/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.BufferedReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.hdfs.DFSUtil;
/**
* This class provides rudimentary checking of DFS volumes for errors and
* sub-optimal conditions.
* <p>The tool scans all files and directories, starting from an indicated
* root path. The following abnormal conditions are detected and handled:</p>
* <ul>
* <li>files with blocks that are completely missing from all datanodes.<br/>
* In this case the tool can perform one of the following actions:
* <ul>
* <li>none ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_NONE})</li>
* <li>move corrupted files to /lost+found directory on DFS
* ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_MOVE}). Remaining data blocks are saved as a
* block chains, representing longest consecutive series of valid blocks.</li>
* <li>delete corrupted files ({@link org.apache.hadoop.hdfs.server.namenode.NamenodeFsck#FIXING_DELETE})</li>
* </ul>
* </li>
* <li>detect files with under-replicated or over-replicated blocks</li>
* </ul>
* Additionally, the tool collects a detailed overall DFS statistics, and
* optionally can print detailed statistics on block locations and replication
* factors of each file.
* The tool also provides and option to filter open files during the scan.
*
*/
public class DFSck extends Configured implements Tool {
DFSck() {
this.out = System.out;
}
private final PrintStream out;
/**
* Filesystem checker.
* @param conf current Configuration
* @throws Exception
*/
public DFSck(Configuration conf) throws Exception {
this(conf, System.out);
}
public DFSck(Configuration conf, PrintStream out) throws IOException {
super(conf);
this.out = out;
}
protected String getInfoServer() throws Exception {
return NetUtils.getServerAddress(getConf(), "dfs.info.bindAddress",
"dfs.info.port", "dfs.http.address");
}
/**
* Print fsck usage information
*/
static void printUsage() {
System.err.println("Usage: DFSck <path> [-list-corruptfileblocks | " +
"[-move | -delete | -openforwrite ] " +
"[-files [-blocks [-locations | -racks]]]] " +
"[-limit <limit>] [-service serviceName]" +
"[-(zero/one)]");
System.err.println("\t<path>\tstart checking from this path");
System.err.println("\t-move\tmove corrupted files to /lost+found");
System.err.println("\t-delete\tdelete corrupted files");
System.err.println("\t-files\tprint out files being checked");
System.err.println("\t-openforwrite\tprint out files opened for write");
System.err.println("\t-list-corruptfileblocks\tprint out list of missing " +
"blocks and files they belong to");
System.err.println("\t-blocks\tprint out block report");
System.err.println("\t-locations\tprint out locations for every block");
System.err.println("\t-racks\tprint out network topology for data-node locations");
System.err.println("\t-limit\tlimit output to <limit> corrupt files. " +
"The default value of the limit is 500.");
System.err.println("\t\tBy default fsck ignores files opened for write, " +
"use -openforwrite to report such files. They are usually " +
" tagged CORRUPT or HEALTHY depending on their block " +
"allocation status");
ToolRunner.printGenericCommandUsage(System.err);
}
/**
* To get the list, we need to call iteratively until the server says
* there is no more left.
*/
private Integer listCorruptFileBlocks(String dir, int limit, String baseUrl)
throws IOException {
int errCode = -1;
int numCorrupt = 0;
int cookie = 0;
String lastBlock = null;
final String noCorruptLine = "has no CORRUPT files";
final String noMoreCorruptLine = "has no more CORRUPT files";
final String cookiePrefix = "Cookie:";
boolean allDone = false;
while (!allDone) {
final StringBuffer url = new StringBuffer(baseUrl);
if (cookie > 0) {
url.append("&startblockafterIndex=").append(String.valueOf(cookie));
} else if (lastBlock != null) { // for backwards compatibility purpose
url.append("&startblockafter=").append(lastBlock);
}
URL path = new URL(url.toString());
// SecurityUtil.fetchServiceTicket(path);
URLConnection connection = path.openConnection();
InputStream stream = connection.getInputStream();
BufferedReader input = new BufferedReader(new InputStreamReader(stream,
"UTF-8"));
try {
String line = null;
while ((line = input.readLine()) != null) {
if (line.startsWith(cookiePrefix)){
try{
cookie = Integer.parseInt(line.split("\t")[1]);
} catch (Exception e){
allDone = true;
break;
}
continue;
}
if ((line.endsWith(noCorruptLine)) ||
(line.endsWith(noMoreCorruptLine)) ||
(line.endsWith(NamenodeFsck.HEALTHY_STATUS)) ||
(line.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) ||
numCorrupt >= limit) {
allDone = true;
break;
}
if ((line.isEmpty())
|| (line.startsWith("FSCK started by"))
|| (line.startsWith("Unable to locate any corrupt files under"))
|| (line.startsWith("The filesystem under path")))
continue;
numCorrupt++;
if (numCorrupt == 1) {
out.println("The list of corrupt files under path '"
+ dir + "' are:");
}
out.println(line);
try {
// Get the block # that we need to send in next call
lastBlock = line.split("\t")[0];
} catch (Exception e) {
allDone = true;
break;
}
}
} finally {
input.close();
}
}
out.println("The filesystem under path '" + dir + "' has "
+ numCorrupt + " CORRUPT files");
if (numCorrupt == 0)
errCode = 0;
return errCode;
}
/**
* @param args
*/
public int run(String[] args) throws Exception {
try {
args = DFSUtil.setGenericConf(args, getConf());
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
printUsage();
return -1;
}
String fsName = getInfoServer();
if (args.length == 0) {
printUsage();
return -1;
}
StringBuffer url = new StringBuffer("http://"+fsName+"/fsck?path=");
String dir = "/";
int limit = 500; // limit output.
// find top-level dir first
for (int idx = 0; idx < args.length; ) {
if (args[idx].equals("-limit")) {
idx++; // Skip over limit value
} else if (!args[idx].startsWith("-")) {
dir = args[idx];
break;
}
idx++;
}
url.append(URLEncoder.encode(dir, "UTF-8"));
boolean doListCorruptFileBlocks = false;
for (int idx = 0; idx < args.length; ) {
if (args[idx].equals("-move")) { url.append("&move=1"); }
else if (args[idx].equals("-delete")) { url.append("&delete=1"); }
else if (args[idx].equals("-files")) { url.append("&files=1"); }
else if (args[idx].equals("-openforwrite")) { url.append("&openforwrite=1"); }
else if (args[idx].equals("-blocks")) { url.append("&blocks=1"); }
else if (args[idx].equals("-locations")) { url.append("&locations=1"); }
else if (args[idx].equals("-racks")) { url.append("&racks=1"); }
else if (args[idx].equals("-list-corruptfileblocks")) {
url.append("&listcorruptfileblocks=1");
doListCorruptFileBlocks = true;
} else if (args[idx].equals("-limit")) {
idx++;
limit = Integer.parseInt(args[idx]);
} else if (args[idx].equals("-list-decommissioningblocks")) {
url.append("&decommissioning=1");
}
idx++;
}
if (doListCorruptFileBlocks) {
return listCorruptFileBlocks(dir, limit, url.toString());
}
URL path = new URL(url.toString());
System.err.println("Connecting to : " + path);
URLConnection connection = path.openConnection();
InputStream stream = connection.getInputStream();
BufferedReader input = new BufferedReader(new InputStreamReader(
stream, "UTF-8"));
String line = null;
String lastLine = null;
int errCode = -1;
try {
while ((line = input.readLine()) != null) {
out.println(line);
lastLine = line;
}
} finally {
input.close();
}
if (lastLine.endsWith(NamenodeFsck.HEALTHY_STATUS)) {
errCode = 0;
} else if (lastLine.endsWith(NamenodeFsck.CORRUPT_STATUS)) {
errCode = 1;
} else if (lastLine.endsWith(NamenodeFsck.NONEXISTENT_STATUS)) {
errCode = 0;
}
return errCode;
}
static{
Configuration.addDefaultResource("hdfs-default.xml");
Configuration.addDefaultResource("hdfs-site.xml");
Configuration.addDefaultResource("avatar-default.xml");
Configuration.addDefaultResource("avatar-site.xml");
}
/**
* Adjusts configuration for nameservice keys. Also uses avatar-aware trick,
* so we can use fsck without ZK, also during failover, manually by specifying
* zero/one option.
*/
private static String[] adjustConf(String[] argv, Configuration conf) {
String[] serviceId = new String[] { "" };
String[] filteredArgv = DFSUtil.getServiceName(argv, serviceId);
if (!serviceId[0].equals("")) {
NameNode.checkServiceName(conf, serviceId[0]);
DFSUtil.setGenericConf(conf, serviceId[0],
NameNode.NAMESERVICE_SPECIFIC_KEYS);
NameNode.setupDefaultURI(conf);
}
// make it avatar aware (manual option)
if (optionExist(argv, "-one")) {
updateConfKeys(conf, "1", serviceId[0]);
} else {
updateConfKeys(conf, "0", serviceId[0]);
}
return filteredArgv;
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// service aware
try {
args = adjustConf(args, conf);
} catch (IllegalArgumentException e) {
System.err.println(e.getMessage());
printUsage();
System.exit(-1);
}
// -files option is also used by GenericOptionsParser
// Make sure that is not the first argument for fsck
int res = -1;
if ((args.length == 0 ) || ("-files".equals(args[0])))
printUsage();
else
res = ToolRunner.run(new DFSck(conf), args);
System.exit(res);
}
/**
* For federated and avatar clusters, we need update the http key.
*/
private static void updateConfKeys(Configuration conf, String suffix,
String nameserviceId) {
String value = conf.get(FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY + suffix
+ (nameserviceId.isEmpty() ? "" : ("." + nameserviceId)));
if (value != null) {
conf.set(FSConstants.DFS_NAMENODE_HTTP_ADDRESS_KEY, value);
}
}
/**
* Check if the option exist in the given arguments.
*/
private static boolean optionExist(String args[], String opt) {
for (String arg : args) {
if (arg.equalsIgnoreCase(opt)) {
return true;
}
}
return false;
}
}