package org.commoncrawl.hadoop.mergeutils; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.Charset; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; public class TextFileSpillWriter implements SpillWriter<WritableComparable,Writable> { Writer writer; // default buffer size public static final int DEFAULT_SPILL_BUFFER_SIZE = 1000000; // the size of our spill buffer public static final String SPILL_WRITER_BUFFER_SIZE_PARAM = "commoncrawl.spillwriter.buffer.size"; public TextFileSpillWriter(FileSystem fileSystem, Configuration conf, Path outputFilePath)throws IOException { writer = new OutputStreamWriter(fileSystem.create( outputFilePath, true, conf.getInt(SPILL_WRITER_BUFFER_SIZE_PARAM, DEFAULT_SPILL_BUFFER_SIZE) ),Charset.forName("UTF-8")); } @Override public void close() throws IOException { try { writer.flush(); } finally { IOUtils.closeStream(writer); } } @Override public void spillRecord(WritableComparable key, Writable value) throws IOException { if (!(key instanceof NullWritable)) { writer.write(key.toString()); if (!(value instanceof NullWritable)) { writer.write('\t'); } } if (!(value instanceof NullWritable)) { writer.write(value.toString()); } writer.write('\n'); } }