package org.apache.pig.piggybank.storage.hiverc;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.RCFile;
import org.apache.hadoop.hive.ql.io.RCFileOutputFormat;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile.Metadata;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HiveRCOutputFormat extends FileOutputFormat<NullWritable, Writable> {
private static final Logger LOG = LoggerFactory.getLogger(RCFileOutputFormat.class);
public static String COMPRESSION_CODEC_CONF = "rcfile.output.compression.codec";
public static String DEFAULT_EXTENSION = ".rc";
public static String EXTENSION_OVERRIDE_CONF = "rcfile.output.filename.extension"; // "none" disables it.
/**
* set number of columns into the given configuration.
*
* @param conf
* configuration instance which need to set the column number
* @param columnNum
* column number for RCFile's Writer
*
*/
public static void setColumnNumber(Configuration conf, int columnNum) {
assert columnNum > 0;
conf.setInt(RCFile.COLUMN_NUMBER_CONF_STR, columnNum);
}
/**
* Returns the number of columns set in the conf for writers.
*
* @param conf
* @return number of columns for RCFile's writer
*/
public static int getColumnNumber(Configuration conf) {
return conf.getInt(RCFile.COLUMN_NUMBER_CONF_STR, 0);
}
protected RCFile.Writer createRCFileWriter(TaskAttemptContext job,
Text columnMetadata)
throws IOException {
Configuration conf = job.getConfiguration();
// override compression codec if set.
String codecOverride = conf.get(COMPRESSION_CODEC_CONF);
if (codecOverride != null) {
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec", codecOverride);
}
CompressionCodec codec = null;
if (getCompressOutput(job)) {
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job, GzipCodec.class);
codec = ReflectionUtils.newInstance(codecClass, conf);
}
Metadata metadata = null;
String ext = conf.get(EXTENSION_OVERRIDE_CONF, DEFAULT_EXTENSION);
Path file = getDefaultWorkFile(job, ext.equalsIgnoreCase("none") ? null : ext);
LOG.info("writing to rcfile " + file.toString());
return new RCFile.Writer(file.getFileSystem(conf), conf, file, job, metadata, codec);
}
/**
* RecordWriter wrapper around an RCFile.Writer
*/
static protected class Writer extends RecordWriter<NullWritable, Writable> {
private final RCFile.Writer rcfile;
protected Writer(HiveRCOutputFormat outputFormat,
TaskAttemptContext job,
Text columnMetadata) throws IOException {
rcfile = outputFormat.createRCFileWriter(job, columnMetadata);
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
rcfile.close();
}
@Override
public void write(NullWritable key, Writable value) throws IOException, InterruptedException {
rcfile.append(value);
}
}
@Override
public RecordWriter<NullWritable, Writable> getRecordWriter(
TaskAttemptContext job) throws IOException, InterruptedException {
return new Writer(this, job, null);
}
}