package org.archive.hadoop.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.Charset; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class HDFSeeko implements Tool { public static final String TOOL_NAME = "hdfs-fseeko"; public static final String TOOL_DESCRIPTION = "tool which outputs ranges of files in HDFS"; private Configuration conf; private static int maxRead = 1024 * 4; byte buffer[]; public HDFSeeko() { buffer = new byte[maxRead]; } public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return conf; } private static int USAGE(int code) { System.err.println("USAGE"); System.err.println(TOOL_NAME + " HDFS_URL [OFFSET] [LENGTH]"); System.err.println("\tdump to STDOUT the contents of HDFS_URL"); System.err.println("\tif additional arguments are provided the first is the offset where dumping begins"); System.err.println("\tif a third option is specified, output up to LENGTH bytes, otherwise dump to EOF"); System.err.println(""); System.err.println("\tif no arguments are given, lines are read from STDIN."); System.err.println("\teach line is 1-3 SPACE separated fields, with semantics for the fields"); System.err.println("\tidentical to the command line arguments. Inputs lines may contain different numbers of fields."); return code; } public int run(String[] args) throws Exception { OutputStream out = System.out; if(args.length == 0) { Charset UTF8 = Charset.forName("UTF-8"); InputStreamReader isr = new InputStreamReader(System.in,UTF8); BufferedReader br = new BufferedReader(isr); String line; while(true) { line = br.readLine(); if(line == null) { break; } String parts[] = line.split(" "); try { dump(out,parts); } catch(NumberFormatException e) { throw new IOException("Bad input line:" + line,e); } catch(IllegalArgumentException e) { throw new IOException("Bad input line:" + line,e); } } } else { try { dump(out,args); } catch(NumberFormatException e) { return USAGE(1); } catch(IllegalArgumentException e) { return USAGE(1); } } return 0; } private void dump(OutputStream out, String args[]) throws IOException, NumberFormatException, IllegalArgumentException, URISyntaxException { String url = args[0]; long offset = 0; long length = -1; if(args.length > 1) { offset = Long.parseLong(args[1]); if(args.length > 2) { length = Long.parseLong(args[2]); if(args.length > 3) { throw new IllegalArgumentException(); } } } dump(out,url,offset,length); } private void dump(OutputStream out, String url, long offset, long length) throws URISyntaxException, IOException { URI uri = new URI(url); FileSystem fs = FileSystem.get(uri, getConf()); Path path = new Path(url); FSDataInputStream fsdis = fs.open(path); fsdis.seek(offset); if(length == -1) { // dump till EOF: while(true) { int amt = fsdis.read(buffer); if(amt == -1) { break; } out.write(buffer,0,amt); } } else { long totalRead = 0; while(length > 0) { int amtToRead = (int) Math.min(maxRead, length); int amtRead = fsdis.read(buffer,0,amtToRead); if(amtRead == -1) { throw new IOException(String.format("Got EOF after (%d) bytes. (%d) left\n", totalRead,length)); } length -= amtRead; totalRead += amtRead; out.write(buffer,0,amtRead); } } } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new HDFSeeko(), args); System.exit(res); } }