/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.io; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import com.google.common.collect.ImmutableList; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import javax.annotation.Nullable; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.SpecificDatumReader; import org.apache.avro.specific.SpecificDatumWriter; import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.testing.NeedsRunner; import org.apache.beam.sdk.testing.PAssert; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.Create; import org.apache.beam.sdk.values.PCollection; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Suite; /** * A test suite for {@link AvroIO.Write} and {@link AvroIO.Read} transforms. */ @RunWith(Suite.class) @Suite.SuiteClasses({ AvroIOTransformTest.AvroIOReadTransformTest.class, AvroIOTransformTest.AvroIOWriteTransformTest.class }) public class AvroIOTransformTest { // TODO: Stop requiring local files @Rule public final transient TestPipeline pipeline = TestPipeline.create(); @Rule public final TemporaryFolder tmpFolder = new TemporaryFolder(); private static final Schema.Parser parser = new Schema.Parser(); private static final String SCHEMA_STRING = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"AvroGeneratedUser\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}"; private static final Schema SCHEMA = parser.parse(SCHEMA_STRING); private static AvroGeneratedUser[] generateAvroObjects() { final AvroGeneratedUser user1 = new AvroGeneratedUser(); user1.setName("Bob"); user1.setFavoriteNumber(256); final AvroGeneratedUser user2 = new AvroGeneratedUser(); user2.setName("Alice"); user2.setFavoriteNumber(128); final AvroGeneratedUser user3 = new AvroGeneratedUser(); user3.setName("Ted"); user3.setFavoriteColor("white"); return new AvroGeneratedUser[] { user1, user2, user3 }; } /** * Tests for AvroIO Read transforms, using classes generated from {@code user.avsc}. */ @RunWith(Parameterized.class) public static class AvroIOReadTransformTest extends AvroIOTransformTest { private static GenericRecord[] generateAvroGenericRecords() { final GenericRecord user1 = new GenericData.Record(SCHEMA); user1.put("name", "Bob"); user1.put("favorite_number", 256); final GenericRecord user2 = new GenericData.Record(SCHEMA); user2.put("name", "Alice"); user2.put("favorite_number", 128); final GenericRecord user3 = new GenericData.Record(SCHEMA); user3.put("name", "Ted"); user3.put("favorite_color", "white"); return new GenericRecord[] { user1, user2, user3 }; } private void generateAvroFile(final AvroGeneratedUser[] elements, final File avroFile) throws IOException { final DatumWriter<AvroGeneratedUser> userDatumWriter = new SpecificDatumWriter<>(AvroGeneratedUser.class); try (DataFileWriter<AvroGeneratedUser> dataFileWriter = new DataFileWriter<>(userDatumWriter)) { dataFileWriter.create(elements[0].getSchema(), avroFile); for (final AvroGeneratedUser user : elements) { dataFileWriter.append(user); } } } private <T> void runTestRead(@Nullable final String applyName, final AvroIO.Read<T> readBuilder, final String expectedName, final T[] expectedOutput) throws Exception { final File avroFile = tmpFolder.newFile("file.avro"); generateAvroFile(generateAvroObjects(), avroFile); final AvroIO.Read<T> read = readBuilder.from(avroFile.getPath()); final PCollection<T> output = applyName == null ? pipeline.apply(read) : pipeline.apply(applyName, read); PAssert.that(output).containsInAnyOrder(expectedOutput); pipeline.run(); assertEquals(expectedName, output.getName()); } @Parameterized.Parameters(name = "{2}_with_{4}") public static Iterable<Object[]> data() throws IOException { final String generatedClass = "GeneratedClass"; final String fromSchema = "SchemaObject"; final String fromSchemaString = "SchemaString"; return ImmutableList.<Object[]>builder() .add( // test read using generated class new Object[] { null, AvroIO.read(AvroGeneratedUser.class), "AvroIO.Read/Read.out", generateAvroObjects(), generatedClass }, new Object[] { "MyRead", AvroIO.read(AvroGeneratedUser.class), "MyRead/Read.out", generateAvroObjects(), generatedClass }, // test read using schema object new Object[] { null, AvroIO.readGenericRecords(SCHEMA), "AvroIO.Read/Read.out", generateAvroGenericRecords(), fromSchema }, new Object[] { "MyRead", AvroIO.readGenericRecords(SCHEMA), "MyRead/Read.out", generateAvroGenericRecords(), fromSchema }, // test read using schema string new Object[] { null, AvroIO.readGenericRecords(SCHEMA_STRING), "AvroIO.Read/Read.out", generateAvroGenericRecords(), fromSchemaString }, new Object[] { "MyRead", AvroIO.readGenericRecords(SCHEMA_STRING), "MyRead/Read.out", generateAvroGenericRecords(), fromSchemaString }) .build(); } @SuppressWarnings("DefaultAnnotationParam") @Parameterized.Parameter(0) public String transformName; @Parameterized.Parameter(1) public AvroIO.Read readTransform; @Parameterized.Parameter(2) public String expectedReadTransformName; @Parameterized.Parameter(3) public Object[] expectedOutput; @Parameterized.Parameter(4) public String testAlias; @Test @Category(NeedsRunner.class) public void testRead() throws Exception { runTestRead(transformName, readTransform, expectedReadTransformName, expectedOutput); } } /** * Tests for AvroIO Write transforms, using classes generated from {@code user.avsc}. */ @RunWith(Parameterized.class) public static class AvroIOWriteTransformTest extends AvroIOTransformTest { private static final String WRITE_TRANSFORM_NAME = "AvroIO.Write"; private List<AvroGeneratedUser> readAvroFile(final File avroFile) throws IOException { final DatumReader<AvroGeneratedUser> userDatumReader = new SpecificDatumReader<>(AvroGeneratedUser.class); final List<AvroGeneratedUser> users = new ArrayList<>(); try (DataFileReader<AvroGeneratedUser> dataFileReader = new DataFileReader<>(avroFile, userDatumReader)) { while (dataFileReader.hasNext()) { users.add(dataFileReader.next()); } } return users; } @Parameterized.Parameters(name = "{0}_with_{1}") public static Iterable<Object[]> data() throws IOException { final String generatedClass = "GeneratedClass"; final String fromSchema = "SchemaObject"; final String fromSchemaString = "SchemaString"; return ImmutableList.<Object[]>builder() .add( new Object[] { AvroIO.write(AvroGeneratedUser.class), generatedClass }, new Object[] { AvroIO.writeGenericRecords(SCHEMA), fromSchema }, new Object[] { AvroIO.writeGenericRecords(SCHEMA_STRING), fromSchemaString }) .build(); } @SuppressWarnings("DefaultAnnotationParam") @Parameterized.Parameter(0) public AvroIO.Write writeTransform; @Parameterized.Parameter(1) public String testAlias; private <T> void runTestWrite(final AvroIO.Write<T> writeBuilder) throws Exception { final File avroFile = tmpFolder.newFile("file.avro"); final AvroGeneratedUser[] users = generateAvroObjects(); final AvroIO.Write<T> write = writeBuilder.to(avroFile.getPath()); @SuppressWarnings("unchecked") final PCollection<T> input = pipeline.apply(Create.of(Arrays.asList((T[]) users)) .withCoder((Coder<T>) AvroCoder.of(AvroGeneratedUser.class))); input.apply(write.withoutSharding()); pipeline.run(); assertEquals(WRITE_TRANSFORM_NAME, write.getName()); assertThat(readAvroFile(avroFile), containsInAnyOrder(users)); } @Test @Category(NeedsRunner.class) public void testWrite() throws Exception { runTestWrite(writeTransform); } // TODO: for Write only, test withSuffix, withNumShards, // withShardNameTemplate and withoutSharding. } }