package org.archive.hadoop.util; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.archive.util.StreamCopy; public class HDFSSync implements Tool { public final static String TOOL_NAME = "hdfs-sync"; public static final String TOOL_DESCRIPTION = "A tool for copying files into and out of HDFS, in a semi-restartable fashion"; private Configuration conf; public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return conf; } private static int USAGE(int code) { System.err.println("USAGE"); System.err.println(TOOL_NAME); System.err.println("\t\tread SRC TGT tuples from STDIN, where one of SRC or TGT"); System.err.println("\t\tis an hdfs:// URL, and the other is a local path"); System.err.println("\t\tWill only perform the copy if TGT does not already exist"); System.err.println("\t\tThis is NOT rsync - no comparison, checksumming, or even file length checking happens."); return code; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new HDFSSync(), args); System.exit(res); } public int run(String[] args) throws IOException { if(args.length != 0) { return USAGE(1); } InputStreamReader isr = new InputStreamReader(System.in); BufferedReader br = new BufferedReader(isr); while(true) { String line = br.readLine(); if(line == null) { break; } String parts[] = line.split("\\s"); if(parts.length != 2) { System.err.println("Bad input line(" + line + ")"); return 1; } String src = parts[0]; String tgt = parts[1]; if(src.startsWith("hdfs://")) { if(syncFromHDFS(src,tgt)) { System.out.format("Copied\t%s\t%s\n", src,tgt); } } else if(tgt.startsWith("hdfs://")) { if(syncToHDFS(src,tgt)) { System.out.format("Copied\t%s\t%s\n", src,tgt); } } else { System.err.println("Bad input tgt not '/' (" + line + ")"); return 1; } } return 0; } private static boolean syncToHDFS(String src, String tgt) throws IOException { if(!tgt.startsWith("hdfs://")) { System.err.println("Bad input tgt not 'hdfs://' (" + src + "," + tgt + ")"); System.exit(1); } if(!src.startsWith("/")) { System.err.println("Bad input src not '/' (" + src + "," + tgt + ")"); System.exit(1); } File srcFile = new File(src); Path fsPath = new Path(tgt); Path fsPathTmp = new Path(tgt+".TMP"); FileSystem fs = fsPath.getFileSystem(new Configuration()); if(fs.isFile(fsPath)) { System.err.format("Target-Exists\t%s\t%s\n", src,tgt); return false; } if(!srcFile.isFile()) { System.err.println("Bad input src not a File (" + src + "," + tgt + ")"); System.exit(1); } FileInputStream fis = new FileInputStream(srcFile); FSDataOutputStream fsOut = fs.create(fsPathTmp, true); StreamCopy.copy(fis, fsOut); fis.close(); fsOut.close(); if(fs.rename(fsPathTmp, fsPath)) { return true; } System.err.format("FAILED to mv(%s) to (%s)\n", fsPathTmp.toUri().toASCIIString(), fsPath.toUri().toASCIIString()); return false; } private static boolean syncFromHDFS(String src, String tgt) throws IOException { if(!src.startsWith("hdfs://")) { System.err.println("Bad input src not 'hdfs://' (" + src + "," + tgt + ")"); System.exit(1); } if(!tgt.startsWith("/")) { System.err.println("Bad input tgt not '/' (" + src + "," + tgt + ")"); System.exit(1); } File tgtFile = new File(tgt); File tmpTgtFile = new File(tgt+".TMP"); if(tgtFile.exists()) { System.err.format("Target-Exists\t%s\t%s\n", src,tgt); return false; } if(tmpTgtFile.exists()) { if(!tmpTgtFile.delete()) { throw new IOException("FAILED to unlink(" + tmpTgtFile.getAbsolutePath()+")"); } } Path fsPath = new Path(src); FileSystem fs = fsPath.getFileSystem(new Configuration()); FSDataInputStream fsdis = null; try { fsdis = fs.open(fsPath); } catch (IOException e) { System.err.format("FAILD open(%s): (%s)\n",src,e.getMessage()); return false; } FileOutputStream fos = new FileOutputStream(tmpTgtFile); StreamCopy.copy(fsdis, fos); fos.close(); fsdis.close(); if(tmpTgtFile.renameTo(tgtFile)) { return true; } System.err.println("FAILED to mv("+tmpTgtFile.getAbsolutePath()+") to ("+tgtFile.getAbsolutePath()+")"); return false; } }