package com.linkedin.camus.sweeper.utils; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapreduce.AvroKeyOutputFormat; import org.apache.avro.mapreduce.AvroKeyRecordWriter; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import com.linkedin.camus.sweeper.mapreduce.CamusSweeperOutputCommitter; /** * The AvroKeyOutputFormat class to relax the name validation * * @author hcai * * @param <T> The Java type of the Avro data to serialize. */ public class RelaxedAvroKeyOutputFormat<T> extends AvroKeyOutputFormat<T> { private static final Log LOG = LogFactory.getLog(RelaxedAvroKeyOutputFormat.class.getName()); private static final String CONF_OUTPUT_KEY_SCHEMA = "avro.schema.output.key"; private FileOutputCommitter commiter = null; @Override public RecordWriter<AvroKey<T>, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException { LOG.info("getRecordWriter for" + context); // Get the writer schema. String schemaString = context.getConfiguration().get(CONF_OUTPUT_KEY_SCHEMA); Schema writerSchema = schemaString != null ? RelaxedSchemaUtils.parseSchema(schemaString, context.getConfiguration()) : null; if (null == writerSchema) { throw new IOException("AvroKeyOutputFormat requires an output schema. Use AvroJob.setOutputKeySchema()."); } return new AvroKeyRecordWriter<T>(writerSchema, GenericData.get(), getCompressionCodec(context), getAvroFileOutputStream(context)); } @Override public synchronized OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { if (this.commiter == null) { this.commiter = new CamusSweeperOutputCommitter(FileOutputFormat.getOutputPath(context), context); } return this.commiter; } }