/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.apache.avro.mapreduce; import static org.easymock.EasyMock.createMock; import static org.easymock.EasyMock.replay; import static org.easymock.EasyMock.verify; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.apache.avro.Schema; import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileStream; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.hadoop.io.AvroDatumConverter; import org.apache.avro.hadoop.io.AvroDatumConverterFactory; import org.apache.avro.hadoop.io.AvroKeyValue; import org.apache.avro.io.DatumReader; import org.apache.avro.mapred.AvroValue; import org.apache.avro.mapred.FsInput; import org.apache.avro.reflect.ReflectData; import org.apache.avro.reflect.ReflectDatumReader; import org.apache.avro.specific.SpecificDatumReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.junit.Test; public class TestAvroKeyValueRecordWriter { @Test public void testWriteRecords() throws IOException { Job job = new Job(); AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$); TaskAttemptContext context = createMock(TaskAttemptContext.class); replay(context); AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration()); AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class); AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null); @SuppressWarnings("unchecked") AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter = factory.create((Class<AvroValue<TextStats>>) avroValue.getClass()); CodecFactory compressionCodec = CodecFactory.nullCodec(); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); // Use a writer to generate a Avro container file in memory. // Write two records: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>. AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer = new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter, new ReflectData(), compressionCodec, outputStream); TextStats appleStats = new TextStats(); appleStats.name = "apple"; writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats)); TextStats bananaStats = new TextStats(); bananaStats.name = "banana"; writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats)); writer.close(context); verify(context); ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); Schema readerSchema = AvroKeyValue.getSchema( Schema.create(Schema.Type.STRING), TextStats.SCHEMA$); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<GenericRecord>(readerSchema); DataFileStream<GenericRecord> avroFileReader = new DataFileStream<GenericRecord>(inputStream, datumReader); // Verify that the first record was written. assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, TextStats> firstRecord = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next()); assertNotNull(firstRecord.get()); assertEquals("apple", firstRecord.getKey().toString()); assertEquals("apple", firstRecord.getValue().name.toString()); // Verify that the second record was written; assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, TextStats> secondRecord = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next()); assertNotNull(secondRecord.get()); assertEquals("banana", secondRecord.getKey().toString()); assertEquals("banana", secondRecord.getValue().name.toString()); // That's all, folks. assertFalse(avroFileReader.hasNext()); avroFileReader.close(); } public static class R1 { String attribute; } @Test public void testUsingReflection() throws Exception { Job job = new Job(); Schema schema = ReflectData.get().getSchema(R1.class); AvroJob.setOutputValueSchema(job, schema); TaskAttemptContext context = createMock(TaskAttemptContext.class); replay(context); R1 record = new R1(); record.attribute = "test"; AvroValue<R1> avroValue = new AvroValue<R1>(record); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration()); AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class); @SuppressWarnings("unchecked") AvroDatumConverter<AvroValue<R1>, R1> valueConverter = factory.create((Class<AvroValue<R1>>) avroValue.getClass()); AvroKeyValueRecordWriter<Text, AvroValue<R1>> writer = new AvroKeyValueRecordWriter<Text, AvroValue<R1>>( keyConverter, valueConverter, new ReflectData(), CodecFactory.nullCodec(), outputStream); writer.write(new Text("reflectionData"), avroValue); writer.close(context); verify(context); ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray()); Schema readerSchema = AvroKeyValue.getSchema( Schema.create(Schema.Type.STRING), schema); DatumReader<GenericRecord> datumReader = new ReflectDatumReader<GenericRecord>(readerSchema); DataFileStream<GenericRecord> avroFileReader = new DataFileStream<GenericRecord>(inputStream, datumReader); // Verify that the first record was written. assertTrue(avroFileReader.hasNext()); // Verify that the record holds the same data that we've written AvroKeyValue<CharSequence, R1> firstRecord = new AvroKeyValue<CharSequence, R1>(avroFileReader.next()); assertNotNull(firstRecord.get()); assertEquals("reflectionData", firstRecord.getKey().toString()); assertEquals(record.attribute, firstRecord.getValue().attribute); } @Test public void testSyncableWriteRecords() throws IOException { Job job = new Job(); AvroJob.setOutputValueSchema(job, TextStats.SCHEMA$); TaskAttemptContext context = createMock(TaskAttemptContext.class); replay(context); AvroDatumConverterFactory factory = new AvroDatumConverterFactory(job.getConfiguration()); AvroDatumConverter<Text, ?> keyConverter = factory.create(Text.class); AvroValue<TextStats> avroValue = new AvroValue<TextStats>(null); @SuppressWarnings("unchecked") AvroDatumConverter<AvroValue<TextStats>, ?> valueConverter = factory.create((Class<AvroValue<TextStats>>) avroValue.getClass()); CodecFactory compressionCodec = CodecFactory.nullCodec(); FileOutputStream outputStream = new FileOutputStream(new File("target/temp.avro")); // Write a marker followed by each record: <'apple', TextStats('apple')> and <'banana', TextStats('banana')>. AvroKeyValueRecordWriter<Text, AvroValue<TextStats>> writer = new AvroKeyValueRecordWriter<Text, AvroValue<TextStats>>(keyConverter, valueConverter, new ReflectData(), compressionCodec, outputStream); TextStats appleStats = new TextStats(); appleStats.name = "apple"; long pointOne = writer.sync(); writer.write(new Text("apple"), new AvroValue<TextStats>(appleStats)); TextStats bananaStats = new TextStats(); bananaStats.name = "banana"; long pointTwo = writer.sync(); writer.write(new Text("banana"), new AvroValue<TextStats>(bananaStats)); writer.close(context); verify(context); Configuration conf = new Configuration(); conf.set("fs.default.name", "file:///"); Path avroFile = new Path("target/temp.avro"); DataFileReader<GenericData.Record> avroFileReader = new DataFileReader<GenericData.Record>(new FsInput(avroFile, conf), new SpecificDatumReader<GenericData.Record>()); avroFileReader.seek(pointTwo); // Verify that the second record was written; assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, TextStats> secondRecord = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next()); assertNotNull(secondRecord.get()); assertEquals("banana", secondRecord.getKey().toString()); assertEquals("banana", secondRecord.getValue().name.toString()); avroFileReader.seek(pointOne); // Verify that the first record was written. assertTrue(avroFileReader.hasNext()); AvroKeyValue<CharSequence, TextStats> firstRecord = new AvroKeyValue<CharSequence, TextStats>(avroFileReader.next()); assertNotNull(firstRecord.get()); assertEquals("apple", firstRecord.getKey().toString()); assertEquals("apple", firstRecord.getValue().name.toString()); // That's all, folks. avroFileReader.close(); } }