/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.raid;
import java.io.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
import java.net.URLEncoder;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.CorruptFile;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.CorruptFileStatus;
import org.apache.hadoop.raid.DistBlockIntegrityMonitor.Worker;
import org.apache.hadoop.raid.RaidHistogram.BlockFixStatus;
import org.apache.hadoop.raid.RaidHistogram.Point;
import org.apache.hadoop.util.InjectionHandler;
/**
* This class is used in RaidNode's jetty to report the corrupt file counters to
* the namenode in the json form
*/
public class CorruptFileCounterServlet extends HttpServlet {
public static final Log LOG = LogFactory.getLog(CorruptFileCounterServlet.class);
private static final long serialVersionUID = 1L;
final String CORRUPT_DIR_KEY = "path";
public static CorruptFileStatus[] columns = new CorruptFileStatus[] {
CorruptFileStatus.RAID_UNRECOVERABLE,
CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE,
CorruptFileStatus.POTENTIALLY_CORRUPT,
CorruptFileStatus.RECOVERABLE
};
public static String getHTMLLinksText(String url, String text) {
return "<a class=\"warning\" href=\"" + url + "\">" + text + "</a>";
}
public static String getRecoveryLag(long window,
TreeMap<Long, BlockFixStatus> countersMap, String path,
String infoAddr) throws UnsupportedEncodingException {
BlockFixStatus bfs = countersMap.get(window);
StringBuilder sb1 = new StringBuilder();
for (int i = 0; i < bfs.percents.size(); i++) {
if (i > 0) {
sb1.append("/");
}
if (bfs.percentValues != null && bfs.percentValues[i] >= 0) {
StringBuffer url = new StringBuffer("http://" + infoAddr
+ "/corruptfilecounter");
url.append("?root=");
url.append(URLEncoder.encode(path, "UTF-8"));
url.append("&recoverytime=" + bfs.percentValues[i]);
sb1.append(JspUtils.linkWithColor(String.valueOf(bfs.percentValues[i]), i,
url.toString()));
} else {
sb1.append("-");
}
}
return format(window) + " " + sb1.toString();
}
public static String getFailedFiles(long window,
TreeMap<Long, BlockFixStatus> countersMap, String path,
String infoAddr) throws UnsupportedEncodingException {
BlockFixStatus bfs = countersMap.get(window);
String counterDisplay = "";
if (bfs.failedPaths <= 0) {
counterDisplay = "0";
} else {
StringBuffer url = new StringBuffer("http://" + infoAddr
+ "/corruptfilecounter");
url.append("?root=");
url.append(URLEncoder.encode(path, "UTF-8"));
url.append("&recoverytime=" + RaidHistogram.RECOVERY_FAIL);
counterDisplay =
getHTMLLinksText(url.toString(), String.valueOf(bfs.failedPaths));
}
return counterDisplay;
}
public static String getPercentHeader(RaidNode raidNode) {
String[] percentStrs = raidNode.getBlockIntegrityMonitor().getPercentStrs();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < percentStrs.length; i++) {
if (i > 0) {
sb.append("/");
}
if (percentStrs[i].equals("0")) {
sb.append(JspUtils.color(i, "min"));
} else if (percentStrs[i].equals("100")) {
sb.append(JspUtils.color(i, "max"));
} else {
sb.append(JspUtils.color(i, percentStrs[i]));
}
}
return sb.toString();
}
public static String generateTable(
Map<String, Map<CorruptFileStatus, Long>> corruptFilesCounterMap,
String infoAddr, double numDetectionsPerSec, RaidNode raidNode)
throws UnsupportedEncodingException, IOException {
StringBuilder htmlSb = new StringBuilder();
int imageSize = 30;
htmlSb.append(JspUtils.tr(JspUtils.td("Root Directory")
+ JspUtils.td(JspUtils.image(raidNode, "RURF.jpg",
imageSize, imageSize), "Raid Unrecoverable Files")
+ JspUtils.td(JspUtils.image(raidNode, "NRURF.jpg",
imageSize, imageSize), "Not-Raid Unrecoverable Files")
+ JspUtils.td(JspUtils.image(raidNode, "PURF.jpg",
imageSize, imageSize), "Potential Unrecoverable Files")
+ JspUtils.td(JspUtils.image(raidNode, "RF.jpg",
imageSize, imageSize), "Recoverable Files")
+ JspUtils.td(JspUtils.image(raidNode, "DL.jpg",
imageSize, imageSize), "Detection Lag(s)")
+ JspUtils.td(JspUtils.image(raidNode, "RL.jpg",
imageSize, imageSize) + " " + getPercentHeader(raidNode),
"Recovery Lag(s)")
+ JspUtils.td(JspUtils.image(raidNode, "RFF.jpg",
imageSize, imageSize), "Recovery Failed Files")));
for (String path : corruptFilesCounterMap.keySet()) {
Map<CorruptFileStatus, Long> counters = corruptFilesCounterMap.get(path);
StringBuilder oneRow = new StringBuilder();
TreeMap<Long, BlockFixStatus> countersMap =
raidNode.getBlockIntegrityMonitor().getBlockFixStatus(path, System.currentTimeMillis());
int windowSize = countersMap.keySet().size();
oneRow.append(JspUtils.th(windowSize, path));
// Append corrupt file counters
for (CorruptFileStatus cfs: columns) {
Long count = counters.get(cfs);
String counterDisplay = "";
if (count == null || count <= 0) {
counterDisplay = "0";
} else {
StringBuffer url = new StringBuffer("http://" + infoAddr
+ "/corruptfilecounter");
url.append("?root=");
url.append(URLEncoder.encode(path, "UTF-8"));
url.append("&status=");
url.append(URLEncoder.encode(cfs.name(), "UTF-8"));
counterDisplay =
getHTMLLinksText(url.toString(), String.valueOf(count));
}
oneRow.append(JspUtils.th(windowSize, counterDisplay));
}
// Append detection lag
Long potentialCorruptFiles = counters.get(CorruptFileStatus.POTENTIALLY_CORRUPT);
String detectionLag = "";
if (potentialCorruptFiles == null || potentialCorruptFiles <= 0) {
detectionLag = "0";
} else if (numDetectionsPerSec < 1e-6) {
detectionLag = "-";
} else {
long costTime = (long)Math.ceil((double)potentialCorruptFiles/
numDetectionsPerSec);
detectionLag = Long.toString(costTime);
}
oneRow.append(JspUtils.th(windowSize, detectionLag));
oneRow.append(JspUtils.tdWithClass(getRecoveryLag(countersMap.firstKey(),
countersMap, path, infoAddr),
JspUtils.SMALL_CELL));
oneRow.append(JspUtils.tdWithClass(getFailedFiles(countersMap.firstKey(),
countersMap, path, infoAddr),
JspUtils.SMALL_CELL));
htmlSb.append(JspUtils.tr(oneRow.toString()));
// Append recovery lags
boolean head = true;
for (Long window: countersMap.keySet()) {
if (head) {
head = false;
continue;
}
StringBuilder sb = new StringBuilder();
sb.append(JspUtils.tdWithClass(getRecoveryLag(window, countersMap,
path, infoAddr),
JspUtils.SMALL_CELL));
sb.append(JspUtils.tdWithClass(getFailedFiles(window, countersMap,
path, infoAddr), JspUtils.SMALL_CELL));
htmlSb.append(JspUtils.tr(sb.toString()));
}
}
return JspUtils.smallTable(htmlSb.toString());
}
public static String format(long msec) {
long mins = msec / 60 / 1000;
long hours = mins / 60;
mins %= 60;
long days = hours / 24;
hours %= 24;
long weeks = days / 7;
days %= 7;
StringBuilder result = new StringBuilder();
if (weeks > 0) result.append(weeks + "weeks ");
if (days > 0) result.append(days + "days ");
if (hours > 0) result.append(hours + "hours ");
if (mins > 0) result.append(mins + "mins ");
return result.toString().trim();
}
public static void generateWarningText(PrintWriter out,
Map<String, Map<CorruptFileStatus, Long>> corruptFilesCounterMap,
RaidNode raidNode) {
StringBuilder sb = new StringBuilder();
if (raidNode.getInfoServer() == null)
return;
String infoAddr = raidNode.getHostName() + ":"
+ raidNode.getInfoServer().getPort();
try {
sb.append(getHTMLLinksText("http://" + infoAddr + "/missingblocks.jsp",
"WARNING Corrupt files:"));
sb.append(generateTable(corruptFilesCounterMap, infoAddr,
raidNode.getNumDetectionsPerSec(), raidNode));
out.print(sb.toString());
} catch (Exception e) {
LOG.error("Get exception in generateWarningText", e);
}
}
public static void generateFilesContent(PrintWriter out,
String monitorDir, long recoveryTime, RaidNode raidNode) {
RaidHistogram histogram =
raidNode.getBlockIntegrityMonitor().getRecoveryTimes().get(monitorDir);
if (histogram == null) {
return;
}
ArrayList<Point> points = histogram.getPointsWithGivenRecoveryTime(
recoveryTime);
Collections.sort(points);
generateBlockFixJob(out, points, raidNode);
}
public static String getTrackingUrl(String taskId, RaidNode raidNode) {
JobID jobId = TaskAttemptID.forName(taskId).getJobID();
return ((Worker)raidNode.blockIntegrityMonitor.getCorruptionMonitor()).getTrackingUrl(
jobId);
}
public static void generateBlockFixJob(PrintWriter out, ArrayList<Point> points,
RaidNode raidNode) {
out.println(points.size() + " records in total<br>");
StringBuilder htmlSb = new StringBuilder();
htmlSb.append(JspUtils.tr(
JspUtils.td("Time Since <br> Update") +
JspUtils.td("Path") +
JspUtils.td("Job")));
for (Point p : points) {
String jobCell = "N/A";
if (p.taskId != null) {
String trackingUrl = getTrackingUrl(p.taskId, raidNode);
jobCell = JspUtils.link(p.taskId, trackingUrl);
}
htmlSb.append(JspUtils.tr(
JspUtils.td(getTimeToNow(p.time)) +
JspUtils.td(p.path) +
JspUtils.td(jobCell)));
}
out.println(JspUtils.table(htmlSb.toString()));
}
public static String getTimeToNow(long detecTime) {
long currentTime = System.currentTimeMillis();
long hoursSinceCorrupt = (currentTime - detecTime)/3600000;
long remainderMinutes = ((currentTime - detecTime)/60000) % 60;
return (detecTime > 0L) ?
hoursSinceCorrupt + " hrs " + remainderMinutes + " mins":
"now";
}
public static class CorruptFileComapare implements Comparator<CorruptFile> {
public static final int
FIELD_TIME_SINCE_BLOCK_MISSING = 1,
FIELD_PATH = 2,
FIELD_NUM_CORRUPT_BLOCKS = 3,
SORT_ORDER_ASC = 1,
SORT_ORDER_DSC = 2;
int sortField = FIELD_TIME_SINCE_BLOCK_MISSING;
int sortOrder = SORT_ORDER_DSC;
public CorruptFileComapare(String field, String order) {
if (field.equals("timesinceblockmissing")) {
sortField = FIELD_TIME_SINCE_BLOCK_MISSING;
} else if (field.equals("path")) {
sortField = FIELD_PATH;
} else if (field.equals("numcorruptblocks")) {
sortField = FIELD_NUM_CORRUPT_BLOCKS;
}
if (order.equals("DSC")) {
sortOrder = SORT_ORDER_DSC;
} else {
sortOrder = SORT_ORDER_ASC;
}
}
public int compare(CorruptFile c1, CorruptFile c2) {
int ret = 0;
if (c2 == null) {
ret = -1;
} else {
switch (sortField) {
case FIELD_TIME_SINCE_BLOCK_MISSING:
ret = (int) (c2.detectTime - c1.detectTime);
if (ret == 0) {
ret = c1.path.compareTo(c2.path);
}
break;
case FIELD_PATH:
ret = c1.path.compareTo(c2.path);
break;
case FIELD_NUM_CORRUPT_BLOCKS:
ret = c1.numCorrupt - c2.numCorrupt;
if (ret == 0) {
ret = c1.path.compareTo(c2.path);
}
break;
}
}
return (sortOrder == SORT_ORDER_DSC) ? -ret : ret;
}
}
public static String NodeHeaderStr(String name, String monitorDir,
String status, String sortField, String sortOrder) {
String ret = "class=header";
String order = "ASC";
if (name.equals(sortField) ) {
ret += sortOrder;
if ( sortOrder.equals("ASC") )
order = "DSC";
}
ret += " onClick=\"window.document.location=" +
"'corruptfilecounter?root="+monitorDir+"&status="+status+
"&sorter/field=" + name + "&sorter/order=" +
order + "'\" title=\"sort on this column\"";
return ret;
}
public static void generateFileStatus(PrintWriter out, String monitorDir,
String status, RaidNode raidNode, String field, String order) {
CorruptFileStatus matched = null;
for (CorruptFileStatus cfs: CorruptFileStatus.values()) {
if (cfs.name().equals(status)) {
matched = cfs;
break;
}
}
if (matched == null) {
return;
}
ArrayList<CorruptFile> corruptFiles =
raidNode.getCorruptFileList(monitorDir, matched);
if (field == null) {
field = "timesinceblockmissing";
}
if (order == null) {
order = "DSC";
}
Collections.sort(corruptFiles, new CorruptFileComapare(field, order));
out.println("<style> th:hover{text-decoration:underline;cursor:hand;cursor:pointer;}</style>");
out.println( "<div id=\"dfsnodetable\"> ");
out.println("<br> <a name=\"CorruptFiles\" id=\"title\"> " +
corruptFiles.size() + " files in total" + "</a><br><br>");
StringBuilder htmlSb = new StringBuilder();
htmlSb.append( "<tr class=\"headerRow\"> <th " +
NodeHeaderStr("timesinceblockmissing", monitorDir, status, field, order) +
"> Time Since<br>Block Missing <th " +
NodeHeaderStr("numcorruptblocks", monitorDir, status, field, order) +
"> Number Of <br> Corrupt Blocks <th " +
NodeHeaderStr("path", monitorDir, status, field, order) +
"> Path \n");
for (CorruptFile cf: corruptFiles) {
htmlSb.append(
JspUtils.tr(
JspUtils.tdWithClass(getTimeToNow(cf.detectTime),
"timesinceblockmissing") +
JspUtils.tdWithClass(Integer.toString(cf.numCorrupt), "numcorruptblocks") +
JspUtils.tdWithClass(cf.path, "path")));
}
out.println(JspUtils.table(htmlSb.toString()));
out.println("</div>");
}
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
ServletContext context = getServletContext();
RaidNode raidNode = (RaidNode) context.getAttribute("raidnode");
PrintWriter out = response.getWriter();
Map<String, Map<CorruptFileStatus, Long>> corruptFilesCounterMap = raidNode
.getCorruptFilesCounterMap();
String path = request.getParameter(CORRUPT_DIR_KEY);
String sorterField = request.getParameter("sorter/field");
String sorterOrder = request.getParameter("sorter/order");
if (path == null || path.length() == 0) {
String monitorDir = request.getParameter("root");
if (monitorDir == null || monitorDir.length() == 0) {
generateWarningText(out, corruptFilesCounterMap, raidNode);
} else {
String status = request.getParameter("status");
if (status == null || status.length() == 0) {
String recoveryTime = request.getParameter("recoverytime");
if (recoveryTime == null || recoveryTime.length() == 0) {
generateWarningText(out, corruptFilesCounterMap, raidNode);
} else {
generateFilesContent(out, monitorDir, Long.parseLong(recoveryTime), raidNode);
}
} else {
generateFileStatus(out, monitorDir, status, raidNode,
sorterField, sorterOrder);
}
}
} else {
if (corruptFilesCounterMap.containsKey(path)) {
out.println(corruptFilesCounterMap.get(path).get(
CorruptFileStatus.RAID_UNRECOVERABLE) +
corruptFilesCounterMap.get(path).get(
CorruptFileStatus.NOT_RAIDED_UNRECOVERABLE));
}
}
InjectionHandler.processEventIO(InjectionEvent.RAID_HTTPSERVER_TIMEOUT);
}
}