/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.apache.avro.mapreduce;
import static org.easymock.EasyMock.*;
import static org.junit.Assert.*;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.generic.GenericData;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.easymock.Capture;
import org.easymock.EasyMock;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
public class TestAvroKeyOutputFormat {
private static final String SYNC_INTERVAL_KEY = org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY;
private static final int TEST_SYNC_INTERVAL = 12345;
@Rule
public TemporaryFolder mTempDir = new TemporaryFolder();
@Test
public void testWithNullCodec() throws IOException {
Configuration conf = new Configuration();
conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL);
testGetRecordWriter(conf, CodecFactory.nullCodec(), TEST_SYNC_INTERVAL);
}
@Test
public void testWithDeflateCodec() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, 3);
testGetRecordWriter(conf, CodecFactory.deflateCodec(3), DataFileConstants.DEFAULT_SYNC_INTERVAL);
}
@Test
public void testWithSnappyCode() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC);
conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL);
testGetRecordWriter(conf, CodecFactory.snappyCodec(), TEST_SYNC_INTERVAL);
}
@Test
public void testWithBZip2Code() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.BZIP2_CODEC);
testGetRecordWriter(conf, CodecFactory.bzip2Codec(), DataFileConstants.DEFAULT_SYNC_INTERVAL);
}
@Test
public void testWithDeflateCodeWithHadoopConfig() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec","org.apache.hadoop.io.compress.DeflateCodec");
conf.setInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY, -1);
conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL);
testGetRecordWriter(conf, CodecFactory.deflateCodec(-1), TEST_SYNC_INTERVAL);
}
@Test
public void testWithSnappyCodeWithHadoopConfig() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec","org.apache.hadoop.io.compress.SnappyCodec");
testGetRecordWriter(conf, CodecFactory.snappyCodec(), DataFileConstants.DEFAULT_SYNC_INTERVAL);
}
@Test
public void testWithBZip2CodeWithHadoopConfig() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.output.compress", true);
conf.set("mapred.output.compression.codec","org.apache.hadoop.io.compress.BZip2Codec");
conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL);
testGetRecordWriter(conf, CodecFactory.bzip2Codec(), TEST_SYNC_INTERVAL);
}
/**
* Tests that the record writer is constructed and returned correctly from the output format.
*/
private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec, int expectedSyncInterval)
throws IOException {
// Configure a mock task attempt context.
Job job = new Job(conf);
job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
Schema writerSchema = Schema.create(Schema.Type.INT);
AvroJob.setOutputKeySchema(job, writerSchema);
TaskAttemptContext context = createMock(TaskAttemptContext.class);
expect(context.getConfiguration())
.andReturn(job.getConfiguration()).anyTimes();
expect(context.getTaskAttemptID())
.andReturn(TaskAttemptID.forName("attempt_200707121733_0001_m_000000_0"))
.anyTimes();
expect(context.getNumReduceTasks()).andReturn(1);
// Create a mock record writer.
@SuppressWarnings("unchecked")
RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter
= createMock(RecordWriter.class);
AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory
= createMock(AvroKeyOutputFormat.RecordWriterFactory.class);
// Expect the record writer factory to be called with appropriate parameters.
Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
expect(recordWriterFactory.create(eq(writerSchema),
anyObject(GenericData.class),
capture(capturedCodecFactory), // Capture for comparison later.
anyObject(OutputStream.class),
eq(expectedSyncInterval))).andReturn(expectedRecordWriter);
replay(context);
replay(expectedRecordWriter);
replay(recordWriterFactory);
AvroKeyOutputFormat<Integer> outputFormat
= new AvroKeyOutputFormat<Integer>(recordWriterFactory);
RecordWriter<AvroKey<Integer>, NullWritable> recordWriter
= outputFormat.getRecordWriter(context);
// Make sure the expected codec was used.
assertTrue(capturedCodecFactory.hasCaptured());
assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());
verify(context);
verify(expectedRecordWriter);
verify(recordWriterFactory);
assertNotNull(recordWriter);
assertTrue(expectedRecordWriter == recordWriter);
}
}