package org.archive.hadoop.cdx;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.archive.hadoop.mapreduce.CDXMapper;
import org.archive.hadoop.mapreduce.CDXMapper.StringPair;
public class CDXConverterTool implements Tool {
Charset UTF8 = Charset.forName("utf-8");
public final static String TOOL_NAME = "cdx-convert";
public static final String TOOL_DESCRIPTION =
"A tool for converting old CDX lines from STDIN to SURT form on STDOUT";
private Configuration conf;
public void setConf(Configuration conf) {
this.conf = conf;
}
public Configuration getConf() {
return conf;
}
public int run(String[] args) throws Exception {
CDXMapper mapper = new CDXMapper();
mapper.setConf(getConf());
BufferedReader br =
new BufferedReader(new InputStreamReader(System.in,UTF8));
PrintWriter pw =
new PrintWriter(new OutputStreamWriter(System.out,UTF8));
while(true) {
String cdxLine = br.readLine();
if(cdxLine == null) {
break;
}
StringPair pair = mapper.convert(cdxLine);
if(pair != null) {
pw.print(pair.first);
pw.print(" ");
pw.print(pair.second);
pw.println();
}
}
pw.flush();
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new CDXConverterTool(), args);
System.exit(res);
}
}