package org.archive.hadoop.cdx;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HDFSLSR implements Tool {
Charset UTF8 = Charset.forName("utf-8");
public final static String TOOL_NAME = "hdfs-lsr";
public static final String TOOL_DESCRIPTION =
"A tool for producing lsr type output from HDFS to STDOUT";
private Configuration conf;
public void listPath(FileStatus status, FileSystem fs, PrintWriter target) throws IOException {
if(status.isDir()) {
//System.err.format("Recursing into %s\n", status.getPath().toUri().toASCIIString());
FileStatus entries[] = fs.listStatus(status.getPath());
for(FileStatus entry : entries) {
listPath(entry,fs,target);
}
} else {
Path path = status.getPath();
target.format("%s\t%s\n",
path.getName(), path.toUri().toASCIIString());
}
}
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return conf;
}
public static int USAGE(int code) {
System.err.println("Usage: " + TOOL_NAME + " HDFS_URL");
System.err.println("\tRecursively descend into HDFS_URL, producing one line");
System.err.println("\tto STDOUT for each FILE found. Lines are of the format:");
System.err.println("\t\tBASENAME<tab>PATH");
return code;
}
public int run(String[] args) throws Exception {
if(args.length != 1) {
return USAGE(1);
}
Path path = new Path(args[0]);
FileSystem fs = path.getFileSystem(getConf());
FileStatus status = fs.getFileStatus(path);
PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, UTF8));
listPath(status, fs, pw);
pw.flush();
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new HDFSLSR(), args);
System.exit(res);
}
}