/******************************************************************************* * =========================================================== * Ankush : Big Data Cluster Management Solution * =========================================================== * * (C) Copyright 2014, by Impetus Technologies * * This is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL v3) as * published by the Free Software Foundation; * * This software is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this software; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ******************************************************************************/ /** * */ package com.impetus.ankush2.hadoop.monitor; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import net.sf.json.JSONArray; import com.impetus.ankush.common.exception.AnkushException; import com.impetus.ankush.common.utils.JsonMapperUtil; import com.impetus.ankush.common.utils.LogViewHandler; import com.impetus.ankush2.constant.Constant; import com.impetus.ankush2.framework.config.ClusterConfig; import com.impetus.ankush2.framework.config.ComponentConfig; import com.impetus.ankush2.hadoop.deployer.configurator.Hadoop1Configurator; import com.impetus.ankush2.hadoop.utils.HadoopConstants; import com.impetus.ankush2.hadoop.utils.HadoopUtils; import com.impetus.ankush2.utils.AnkushUtils; // TODO: Auto-generated Javadoc /** * The Class Hadoop1Monitor. * * @author Akhil */ public class Hadoop1Monitor extends HadoopMonitor { /** The Constant JMX_BEAN_NAME_JOBTRACKER_INFO. */ public static final String JMX_BEAN_NAME_JOBTRACKER_INFO = "Hadoop:service=JobTracker,name=JobTrackerInfo"; /** The Constant JMX_BEAN_NAME_METRICS_TASKTRACKER. */ public static final String JMX_BEAN_NAME_METRICS_TASKTRACKER = "Hadoop:service=TaskTracker,name=TaskTrackerMetrics"; /** The Constant JMX_BEAN_NAME_METRICS_JOBTRACKER. */ public static final String JMX_BEAN_NAME_METRICS_JOBTRACKER = "Hadoop:service=JobTracker,name=JobTrackerMetrics"; /** * Instantiates a new hadoop1 monitor. */ public Hadoop1Monitor() { super(); } /** * Instantiates a new hadoop1 monitor. * * @param clusterConfig * the cluster config * @param hadoopConfig * the hadoop config */ public Hadoop1Monitor(ClusterConfig clusterConfig, ComponentConfig hadoopConfig) { super(clusterConfig, hadoopConfig, Hadoop1Monitor.class); } /** * Gets the job metrics. * * @return the job metrics * @throws AnkushException * the ankush exception */ public Map<String, Object> getJobMetrics() throws AnkushException { String errMsg = "Could not fetch Hadoop job metrics."; Map<String, Object> jobMetricsMap = new HashMap<String, Object>(); try { String host = HadoopUtils.getJobTrackerHost(this.compConfig); if (!AnkushUtils.getServiceStatus(host, HadoopConstants.Roles.JOBTRACKER, Constant.Component.Name.HADOOP)) { throw new AnkushException( "Could not fetch Hadoop job metrics: JobTracker service on " + host + " host is down."); } JobStatusProvider jobStatusProvider = new JobStatusProvider( clusterConfig, compConfig); jobMetricsMap .put(HadoopConstants.Hadoop.Keys.MONITORING_JOBS_METRICS_DATA_KEY, jobStatusProvider.getJobMetrics()); } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } return jobMetricsMap; } /** * Gets the job list. * * @return the job list * @throws AnkushException * the ankush exception */ public Map<String, Object> getJobList() throws AnkushException { String errMsg = "Could not fetch Hadoop job list."; Map<String, Object> jobListMap = new HashMap<String, Object>(); try { String host = HadoopUtils.getJobTrackerHost(this.compConfig); // if (!AnkushUtils.getServiceStatus(host, // HadoopConstants.Roles.JOBTRACKER, // Constant.Component.Name.HADOOP)) { // throw new AnkushException( // "Could not fetch Hadoop job list: JobTracker service on " // + host + " host is down."); // } JobStatusProvider jobStatusProvider = new JobStatusProvider( clusterConfig, compConfig); jobListMap.put( HadoopConstants.Hadoop.Keys.MONITORING_JOBS_DATA_KEY, jobStatusProvider.listAllJobs()); } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } // String nodeIp = HadoopUtils.getJobTrackerHost(this.compConfig); // // try { // /** The node manager. */ // GenericManager<Node, Long> nodeManager = AppStoreWrapper // .getManager(Constant.Manager.NODE, Node.class); // // GenericManager<NodeMonitoring, Long> monitoringManager = // AppStoreWrapper // .getManager(Constant.Manager.MONITORING, NodeMonitoring.class); // // // Get the db node info using public IP // Node hadoopNode = nodeManager.getByPropertyValueGuarded("publicIp", // nodeIp); // if (hadoopNode != null) { // Long nodeId = hadoopNode.getId(); // // // Get the db node monitoring info // NodeMonitoring nodeMonitoring = monitoringManager // .getByPropertyValueGuarded(com.impetus.ankush2.constant.Constant.Keys.NODEID, // nodeId); // if (nodeMonitoring != null) { // String tileStatus = Constant.Tile.Status.NORMAL; // List<TileInfo> jobTiles = new ArrayList<TileInfo>(); // TileInfo ti = null; // Map<String, Boolean> serviceStatusMap = nodeMonitoring // .getServiceStatus(Constant.Component.Name.HADOOP); // // if (nodeMonitoring.isAgentDown()) { // throw new AnkushException("Ankush Agent is down on " + nodeIp // + " . Please resolve related issues."); // } else { // boolean status = false; // if (serviceStatusMap // .containsKey(HadoopConstants.Roles.JOBTRACKER)) { // status = serviceStatusMap // .get(HadoopConstants.Roles.JOBTRACKER); // } // if (status) { // HadoopMonitoringData technologyData = (HadoopMonitoringData) // nodeMonitoring // .getTechnologyData().get( // Constant.Component.Name.HADOOP); // // ArrayList<HadoopJob> jobInfos = technologyData // .getJobInfos(); // // for (HadoopJob job : jobInfos) { // job.setCounters(null); // job.setMapReport(null); // job.setSetupReport(null); // job.setReduceReport(null); // job.setCleanupReport(null); // } // // jobListMap.put(com.impetus.ankush2.constant.Constant.Keys.TILES, // technologyData.getJobTiles()); // jobListMap.put(com.impetus.ankush2.constant.Constant.Keys.JOBS, // jobInfos); // } else { // tileStatus = Constant.Tile.Status.ERROR; // // namenode down tile. // ti = new TileInfo(com.impetus.ankush2.constant.Constant.Keys.DOWN, // HadoopConstants.Roles.JOBTRACKER, // "Could not fetch job data.", null, // tileStatus, null); // jobTiles.add(ti); // jobListMap.put(com.impetus.ankush2.constant.Constant.Keys.TILES, // jobTiles); // } // } // } else { // throw new AnkushException("Ankush Agent is down on " + nodeIp // + " . Please resolve related issues."); // } // } return jobListMap; // } catch (AnkushException e) { // throw e; // } catch (Exception e) { // HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, // Constant.Component.Name.HADOOP, nodeIp, e); // throw new AnkushException(errMsg); // } } /* * (non-Javadoc) * * @see com.impetus.ankush2.hadoop.monitor.HadoopMonitor#getRoleNodesMap() */ @Override public Map<String, Set<String>> getRoleNodesMap() throws AnkushException { Map<String, Set<String>> roleNodesMap = new LinkedHashMap<String, Set<String>>(); try { roleNodesMap.put(HadoopConstants.Roles.NAMENODE, Collections .singleton(HadoopUtils.getNameNodeHost(this.compConfig))); roleNodesMap.put(HadoopConstants.Roles.JOBTRACKER, Collections .singleton(HadoopUtils.getJobTrackerHost(this.compConfig))); roleNodesMap.put(HadoopConstants.Roles.DATANODE, HadoopUtils.getSlaveHosts(this.compConfig)); roleNodesMap.put(HadoopConstants.Roles.TASKTRACKER, HadoopUtils.getSlaveHosts(this.compConfig)); if (HadoopUtils.getSecondaryNameNodeHost(this.compConfig) != null) { roleNodesMap.put(HadoopConstants.Roles.NAMENODE, Collections .singleton(HadoopUtils .getSecondaryNameNodeHost(this.compConfig))); } } catch (Exception e) { String errMsg = "Could not fetch roles for Hadoop nodes."; HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } return roleNodesMap; } /* * (non-Javadoc) * * @see * com.impetus.ankush2.hadoop.monitor.HadoopMonitor#getLogFilesList(java * .lang.String, java.lang.String) */ @Override public Map<String, String> getLogFilesList(String role, String host) throws AnkushException { String errMsg = "Could not fetch log files for " + role + "."; try { LogViewHandler logHandler = new LogViewHandler(host, this.clusterConfig.getAuthConf()); String logsDirectory = this.compConfig.getHomeDir() + Hadoop1Configurator.RELPATH_LOGS_DIR; // get the list of all log files for a particular role List<String> logFilesList = logHandler.getLogFilesList( logsDirectory, role); if (logFilesList.isEmpty()) { throw new AnkushException(errMsg += " " + logsDirectory + " does not contain logs for " + role + "."); } Map<String, String> logFilesMap = new HashMap<String, String>(); for (String logFile : logFilesList) { logFilesMap.put(logFile, logsDirectory + logFile); } return logFilesMap; } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, host, e); throw new AnkushException(errMsg); } } /* * (non-Javadoc) * * @see * com.impetus.ankush2.hadoop.monitor.HadoopMonitor#getMapReduceSummary() */ @Override public LinkedHashMap<String, String> getMapReduceMetrics() throws AnkushException { String errMsg = "Could not get MapReduce summary."; try { LinkedHashMap<String, String> mapReduceSummaryMap = new LinkedHashMap<String, String>(); String beanName = Hadoop1Monitor.JMX_BEAN_NAME_METRICS_JOBTRACKER; String jobTrackerHost = HadoopUtils .getJobTrackerHost(this.compConfig); String httpPort = HadoopUtils .getJobTrackerHttpPort(this.compConfig); String jobTrackerUiUrl = "http://" + jobTrackerHost + ":" + httpPort + "/"; Map<String, Object> jobTrackerMetricsBeanObject = HadoopUtils .getJmxBeanUsingCallable(jobTrackerHost, httpPort, beanName); if (jobTrackerMetricsBeanObject != null) { mapReduceSummaryMap = this .getMapRedClusterInfoFromBean(jobTrackerMetricsBeanObject); String jobTrackerUiKey = HadoopConstants.Hadoop.Keys.JobTrackerJmxKeyDisplayName .getKeyDisplayName(HadoopConstants.Hadoop.Keys.JobTrackerJmxInfo.JOBTRACKERUI); mapReduceSummaryMap.put(jobTrackerUiKey, jobTrackerUiUrl); } else { throw new AnkushException("Could not connect to url-" + jobTrackerUiUrl); } return mapReduceSummaryMap; } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } } /** * Gets the map red cluster info from bean. * * @param jobtrackerMetricsBeanObject * the jobtracker metrics bean object * @return the map red cluster info from bean */ private static LinkedHashMap<String, String> getMapRedClusterInfoFromBean( Map<String, Object> jobtrackerMetricsBeanObject) { LinkedHashMap<String, String> mapredInfo = new LinkedHashMap<String, String>(); List<String> mapredKeys = new ArrayList<String>(); mapredKeys .add(HadoopConstants.Hadoop.Keys.JobTrackerJmxInfo.JOBS_RUNNING); mapredKeys .add(HadoopConstants.Hadoop.Keys.JobTrackerJmxInfo.JOBS_COMPLETED); // mapredKeys.add(Constant.Hadoop.Keys.JobTrackerJmxInfo.JOBS_SUBMITTED); mapredKeys .add(HadoopConstants.Hadoop.Keys.JobTrackerJmxInfo.SLOTS_TOTAL_MAP); mapredKeys .add(HadoopConstants.Hadoop.Keys.JobTrackerJmxInfo.SLOTS_TOTAL_REDUCE); // mapredKeys.add(Constant.Hadoop.Keys.JobTrackerJmxInfo.TRACKERS_TOTAL); if (jobtrackerMetricsBeanObject != null) { for (String mapredkey : mapredKeys) { String key = HadoopConstants.Hadoop.Keys.JobTrackerJmxKeyDisplayName .getKeyDisplayName(mapredkey); String value = String .valueOf(((Number) jobtrackerMetricsBeanObject .get(mapredkey))); mapredInfo.put(key, value); } } return mapredInfo; } /* * (non-Javadoc) * * @see com.impetus.ankush2.hadoop.monitor.HadoopMonitor#getNodesSummary() */ @Override public LinkedHashMap<String, String> getNodesSummary() throws AnkushException { String errMsg = "Could not get Nodes summary."; try { LinkedHashMap<String, String> nodesSummaryMap = new LinkedHashMap<String, String>(); nodesSummaryMap.put(HadoopConstants.Roles.NAMENODE, HadoopUtils.getNameNodeHost(this.compConfig)); if (HadoopUtils.getSecondaryNameNodeHost(this.compConfig) != null) { nodesSummaryMap.put(HadoopConstants.Roles.SECONDARYNAMENODE, HadoopUtils.getSecondaryNameNodeHost(this.compConfig)); } else { nodesSummaryMap.put(HadoopConstants.Roles.SECONDARYNAMENODE, HadoopMonitor.STRING_EMPTY_VALUE); } nodesSummaryMap.put(HadoopConstants.Roles.JOBTRACKER, HadoopUtils.getJobTrackerHost(this.compConfig)); nodesSummaryMap .put(HadoopConstants.Hadoop.Keys.NodesSummary.COUNT_LIVE_DATANODES, this.getLiveDataNodesCount()); nodesSummaryMap .put(HadoopConstants.Hadoop.Keys.NodesSummary.COUNT_ACTIVE_TASKTRACKERS, this.getActiveMapRedNodesCount()); return nodesSummaryMap; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } } /* * (non-Javadoc) * * @see * com.impetus.ankush2.hadoop.monitor.HadoopMonitor#getActiveMapRedNodesCount * () */ @Override public String getActiveMapRedNodesCount() { try { String jobtrackerHost = HadoopUtils .getJobTrackerHost(this.compConfig); String jobtrackerHttpPort = HadoopUtils .getJobTrackerHttpPort(this.compConfig); String beanName = Hadoop1Monitor.JMX_BEAN_NAME_JOBTRACKER_INFO; Map<String, Object> beanObject = HadoopUtils .getJmxBeanUsingCallable(jobtrackerHost, jobtrackerHttpPort, beanName); if (beanObject != null) { String strActiveNodesInfo = String.valueOf(beanObject .get("AliveNodesInfoJson")); JSONArray jsonArrayActiveNodeInfo = JsonMapperUtil .objectFromString(strActiveNodesInfo, JSONArray.class); if (jsonArrayActiveNodeInfo != null) { return String.valueOf(jsonArrayActiveNodeInfo.size()); } else { HadoopUtils.addAndLogError(LOG, clusterConfig, "Could not get Active TaskTracker count for Host-" + jobtrackerHost + ", Port-" + jobtrackerHttpPort + ".", Constant.Component.Name.HADOOP, jobtrackerHost); } } } catch (AnkushException e) { HadoopUtils.addAndLogError(LOG, clusterConfig, e.getMessage(), Constant.Component.Name.HADOOP, e); } catch (Exception e) { HadoopUtils.addAndLogError(LOG, clusterConfig, "Could not get Active TaskTracker count.", Constant.Component.Name.HADOOP, e); } return HadoopMonitor.STRING_EMPTY_VALUE; } @Override public LinkedHashMap<String, String> getMapReduceProcessSummary() throws AnkushException { String errMsg = "Could not get JobTracker process summary."; try { String jobtrackerHost = HadoopUtils .getJobTrackerHost(this.compConfig); String jobtrackerHttpPort = HadoopUtils .getJobTrackerHttpPort(this.compConfig); return getProcessSummaryFromJmx(jobtrackerHost, jobtrackerHttpPort); } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(LOG, clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } } @Override public LinkedHashMap<String, String> getProcessSummary(String process) throws AnkushException { if(process.equals(HadoopConstants.Roles.NAMENODE)) { return this.getNameNodeProcessSummary(); } else if(process.equals(HadoopConstants.Roles.JOBTRACKER)) { return this.getMapReduceProcessSummary(); } else { throw new AnkushException("Could not get process summary for " + process + "."); } } public LinkedHashMap<String, Object> getJobDetails(String jobId) throws AnkushException,Exception { String errMsg = "Could not fetch Hadoop job details for " + jobId + "."; LinkedHashMap<String, Object> jobListMap = new LinkedHashMap<String, Object>(); try { String host = HadoopUtils.getJobTrackerHost(this.compConfig); if (!AnkushUtils.getServiceStatus(host, HadoopConstants.Roles.JOBTRACKER, Constant.Component.Name.HADOOP)) { throw new AnkushException( "Could not fetch Hadoop job details: JobTracker service on " + host + " host is down."); } JobStatusProvider jobStatusProvider = new JobStatusProvider( clusterConfig, compConfig); jobListMap.put( HadoopConstants.Hadoop.Keys.MONITORING_JOBS_DATA_KEY, jobStatusProvider.getJobStatus(jobId)); } catch (AnkushException e) { throw e; } catch (Exception e) { HadoopUtils.addAndLogError(this.LOG, this.clusterConfig, errMsg, Constant.Component.Name.HADOOP, e); throw new AnkushException(errMsg); } return jobListMap; } }