/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.contrib.failmon; import java.net.InetAddress; import java.util.ArrayList; import java.util.Calendar; import java.util.regex.Matcher; import java.util.regex.Pattern; /********************************************************** * Objects of this class parse the output of smartmontools to * gather information about the state of disks in the system. The * smartmontools utility reads the S.M.A.R.T. attributes from * the disk devices and reports them to the user. Note that since * running smartctl requires superuser provileges, one should * grand sudo privileges to the running user for the command smartctl * (without a password). Alternatively, one can set up a cron job that * periodically dumps the output of smartctl into a user-readable file. * See the configuration file for details. * **********************************************************/ public class SMARTParser extends ShellParser { String[] devices; /** * Constructs a SMARTParser and reads the list of disk * devices to query */ public SMARTParser() { super(); String devicesStr = Environment.getProperty("disks.list"); System.out.println("skato " + devicesStr); if (devicesStr != null) devices = devicesStr.split(",\\s*"); } /** * Reads and parses the output of smartctl for a specified disk and * creates an appropriate EventRecord that holds the desirable * information for it. Since the output of smartctl is different for * different kinds of disks, we try to identify as many attributes as * posssible for all known output formats. * * @param device the disk device name to query * * @return the EventRecord created */ public EventRecord query(String device) throws Exception { String conf = Environment.getProperty("disks." + device + ".source"); StringBuffer sb; if (conf == null) sb = Environment.runCommand("sudo smartctl --all " + device); else sb = Environment.runCommand("cat " + conf); EventRecord retval = new EventRecord(InetAddress.getLocalHost() .getCanonicalHostName(), InetAddress.getAllByName(InetAddress.getLocalHost() .getHostName()), Calendar.getInstance(), "SMART", "Unknown", (conf == null ? "sudo smartctl --all " + device : "file " + conf), "-"); // IBM SCSI disks retval.set("model", findPattern("Device\\s*:\\s*(.*)", sb.toString(), 1)); retval.set("serial", findPattern("Serial\\s+Number\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("firmware", findPattern("Firmware\\s+Version\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("capacity", findPattern("User\\s+Capacity\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("status", findPattern("SMART\\s*Health\\s*Status:\\s*(.*)", sb .toString(), 1)); retval.set("current_temperature", findPattern( "Current\\s+Drive\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1)); retval.set("trip_temperature", findPattern( "Drive\\s+Trip\\s+Temperature\\s*:\\s*(.*)", sb.toString(), 1)); retval.set("start_stop_count", findPattern( "start\\s+stop\\s+count\\s*:\\s*(\\d*)", sb.toString(), 1)); String[] var = { "read", "write", "verify" }; for (String s : var) { retval.set(s + "_ecc_fast", findPattern(s + "\\s*:\\s*(\\d*)", sb .toString(), 1)); retval.set(s + "_ecc_delayed", findPattern(s + "\\s*:\\s*(\\d+\\s+){1}(\\d+)", sb.toString(), 2)); retval.set(s + "_rereads", findPattern( s + "\\s*:\\s*(\\d+\\s+){2}(\\d+)", sb.toString(), 2)); retval.set(s + "_GBs", findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*)", sb.toString(), 2)); retval.set(s + "_uncorrected", findPattern(s + "\\s*:\\s*(\\d+\\s+){5}(\\d+.?\\d*){1}\\s+(\\d+)", sb .toString(), 3)); } // Hitachi IDE, SATA retval.set("model", findPattern("Device\\s*Model\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("serial", findPattern("Serial\\s+number\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("protocol", findPattern("Transport\\s+protocol\\s*:\\s*(.*)", sb .toString(), 1)); retval.set("status", "PASSED".equalsIgnoreCase(findPattern( "test\\s*result\\s*:\\s*(.*)", sb.toString(), 1)) ? "OK" : "FAILED"); readColumns(retval, sb); return retval; } /** * Reads attributes in the following format: * * ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE * 3 Spin_Up_Time 0x0027 180 177 063 Pre-fail Always - 10265 * 4 Start_Stop_Count 0x0032 253 253 000 Old_age Always - 34 * 5 Reallocated_Sector_Ct 0x0033 253 253 063 Pre-fail Always - 0 * 6 Read_Channel_Margin 0x0001 253 253 100 Pre-fail Offline - 0 * 7 Seek_Error_Rate 0x000a 253 252 000 Old_age Always - 0 * 8 Seek_Time_Performance 0x0027 250 224 187 Pre-fail Always - 53894 * 9 Power_On_Minutes 0x0032 210 210 000 Old_age Always - 878h+00m * 10 Spin_Retry_Count 0x002b 253 252 157 Pre-fail Always - 0 * 11 Calibration_Retry_Count 0x002b 253 252 223 Pre-fail Always - 0 * 12 Power_Cycle_Count 0x0032 253 253 000 Old_age Always - 49 * 192 PowerOff_Retract_Count 0x0032 253 253 000 Old_age Always - 0 * 193 Load_Cycle_Count 0x0032 253 253 000 Old_age Always - 0 * 194 Temperature_Celsius 0x0032 037 253 000 Old_age Always - 37 * 195 Hardware_ECC_Recovered 0x000a 253 252 000 Old_age Always - 2645 * * This format is mostly found in IDE and SATA disks. * * @param er the EventRecord in which to store attributes found * @param sb the StringBuffer with the text to parse * * @return the EventRecord in which new attributes are stored. */ private EventRecord readColumns(EventRecord er, StringBuffer sb) { Pattern pattern = Pattern.compile("^\\s{0,2}(\\d{1,3}\\s+.*)$", Pattern.MULTILINE); Matcher matcher = pattern.matcher(sb); while (matcher.find()) { String[] tokens = matcher.group(1).split("\\s+"); boolean failed = false; // check if this attribute is a failed one if (!tokens[8].equals("-")) failed = true; er.set(tokens[1].toLowerCase(), (failed ? "FAILED:" : "") + tokens[9]); } return er; } /** * Invokes query() to do the parsing and handles parsing errors for * each one of the disks specified in the configuration. * * @return an array of EventRecords that holds one element that represents * the current state of the disk devices. */ public EventRecord[] monitor() { ArrayList<EventRecord> recs = new ArrayList<EventRecord>(); for (String device : devices) { try { recs.add(query(device)); } catch (Exception e) { e.printStackTrace(); } } EventRecord[] T = new EventRecord[recs.size()]; return recs.toArray(T); } /** * Return a String with information about this class * * @return A String describing this class */ public String getInfo() { String retval = "S.M.A.R.T. disk attributes parser for disks "; for (String device : devices) retval += device + " "; return retval; } }