package org.archive.io; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.archive.format.gzip.zipnum.ZipNumWriter; import org.archive.hadoop.mapreduce.ZipNumOutputFormat; import org.archive.hadoop.mapreduce.ZipNumRecordWriter; public class ZipNumWriterTool implements Tool { public final static String TOOL_NAME = "zipnum-writer"; public final static String TOOL_DESCRIPTION = "A command line tool for producing ZipNum output files"; private static final Charset UTF8 = Charset.forName("utf-8"); private Configuration conf; public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return conf; } private static int USAGE(int code, String msg) { if(msg != null) { System.err.println(msg); } System.err.println("USAGE " + TOOL_NAME + " GZ SUMM LIMIT"); System.err.println("USAGE " + TOOL_NAME + " GZ SUM LIMIT INPUT"); System.err.println("Write ZipNum at GZ, Summary at SUM, with LIMIT lines per record"); System.err.println("If INPUT is specified, read lines from INPUT, otherwise from STDIN"); return code; } public int run(String args[]) throws IOException { int index = 0; int limit = ZipNumOutputFormat.getZipNumLineCount(conf); String input = null; String output = null; String summary = null; InputStreamReader inReader = null; ZipNumRecordWriter zipnumWriter = null; try { for (index = 0; index < args.length; index++) { if (args[index].equals("-l")) { limit = Integer.valueOf(args[++index]); continue; } if (args[index].equals("-s")) { summary = args[++index]; continue; } if (args[index].equals("-i")) { input = args[++index]; continue; } output = args[index]; } CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodecByClassName(GzipCodec.class.getName()); Path mainFile = new Path(output); Path summaryFile = null; if (summary != null) { summaryFile = new Path(summary); } else { summaryFile = new Path(ZipNumOutputFormat.getSummaryExt(conf)); } FileSystem mainFs = mainFile.getFileSystem(conf); FileSystem summaryFs = summaryFile.getFileSystem(conf); if (input == null) { inReader = new InputStreamReader(System.in); } else { inReader = new InputStreamReader(mainFs.open(new Path(input))); } FSDataOutputStream mainOut = mainFs.create(mainFile, false); FSDataOutputStream summaryOut = summaryFs.create(summaryFile, false); zipnumWriter = new ZipNumRecordWriter(codec, mainOut, summaryOut, mainFile.getName(), limit); BufferedReader br = new BufferedReader(inReader); String line = null; Text empty = new Text(""); Text textLine = new Text(); while ((line = br.readLine()) != null) { textLine.set(line); zipnumWriter.write(textLine, empty); } } catch (RuntimeException re) { re.printStackTrace(); return USAGE(1,"Wrong number of arguments"); } finally { if (inReader != null) { inReader.close(); } if (zipnumWriter != null) { zipnumWriter.close(null); } } return 0; } public int runOld(String args[]) throws IOException { if((args.length < 3) || (args.length > 4)) { return USAGE(1,"Wrong number of arguments"); } InputStream in = System.in; int arg = 0; if(args.length == 4) { in = new FileInputStream(new File(args[arg++])); } File gz = new File(args[arg++]); File summ = new File(args[arg++]); int limit = Integer.valueOf(args[arg++]); ZipNumWriter znw = new ZipNumWriter(new FileOutputStream(gz,false), new FileOutputStream(summ,false), limit); InputStreamReader isr = new InputStreamReader(in,UTF8); BufferedReader br = new BufferedReader(isr); while(true) { String line = br.readLine(); if(line == null) { znw.close(); break; } line = line + "\n"; znw.addRecord(line.getBytes(UTF8)); } znw.close(); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new ZipNumWriterTool(), args); System.exit(res); } }