package org.archive.hadoop.mapreduce; import java.io.DataOutputStream; import java.io.IOException; import java.nio.charset.Charset; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.archive.format.gzip.zipnum.ZipNumWriter; /** * Warning - this has a bug.. leaves empty SUMMARY files in some cases. * * @author brad * */ public class ZipNumRecordWriter extends RecordWriter<Text, Text>{ protected ZipNumWriter znw; public static char DEFAULT_DELIM = ' '; public static char DEFAULT_NL = 10; public char delim = DEFAULT_DELIM; private final static Charset UTF8 = Charset.forName("utf-8"); public ZipNumRecordWriter(int limit, DataOutputStream outMain, DataOutputStream outSummary) { znw = new ZipNumWriter(outMain, outSummary, limit); } @Override public void close(TaskAttemptContext arg0) throws IOException, InterruptedException { znw.close(); } @Override public void write(Text key, Text val) throws IOException, InterruptedException { StringBuilder sb = new StringBuilder(); sb.append(key.toString()); sb.append(delim); sb.append(val.toString()); sb.append(DEFAULT_NL); write(sb.toString().getBytes(UTF8)); } public void write(byte[] bytes) throws IOException { znw.addRecord(bytes); } /** * @return the delim */ public char getDelim() { return delim; } /** * @param delim the delim to set */ public void setDelim(char delim) { this.delim = delim; } }