/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.runtime; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.gson.Gson; import com.google.gson.JsonElement; import com.google.gson.JsonObject; import gobblin.configuration.WorkUnitState; import gobblin.converter.Converter; import gobblin.converter.DataConversionException; import gobblin.converter.EmptyIterable; import gobblin.converter.SchemaConversionException; import gobblin.converter.SingleRecordIterable; import gobblin.test.TestConverter; /** * Unit tests for {@link MultiConverter}. * * @author Yinan Li */ @Test(groups = { "gobblin.runtime" }) public class MultiConverterTest { private static final String TEST_SCHEMA = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": \"int\"},\n" + " {\"name\": \"favorite_color\", \"type\": \"string\"}\n" + " ]\n" + "}"; private static final String TEST_RECORD = "{\"fields\": {" + "\"name\": \"Alyssa\", " + "\"favorite_number\": 256, " + "\"favorite_color\": \"yellow\"" + "}" + "}"; @Test public void testConversion() throws Exception { MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new IdentityConverter(), new TestConverter())); WorkUnitState workUnitState = new WorkUnitState(); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Iterable<Object> convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 1); checkConvertedAvroData(schema, (GenericRecord) convertedRecordIterable.iterator().next()); } @Test public void testConversionWithMultiplicity() throws Exception { MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(2), new MultiIdentityConverter(2), new TestConverter())); WorkUnitState workUnitState = new WorkUnitState(); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Iterable<Object> convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 4); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } } /** * Combines {@link MultiIdentityConverter()} with {@link AlternatingConverter()} * @throws Exception */ @Test public void testConversionWithMultiplicityAndAlternating() throws Exception { MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(6), new AlternatingConverter(4), new TestConverter())); WorkUnitState workUnitState = new WorkUnitState(); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Iterable<Object> convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 10); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(6), new AlternatingConverter(4), new MultiIdentityConverter(4), new TestConverter())); workUnitState = new WorkUnitState(); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 40); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } } /** * Combines {@link MultiIdentityConverter()} with {@link OneOrEmptyConverter()} * @throws Exception */ @Test public void testConversionWithMultiplicityAndOneOrEmpty() throws Exception { MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(20), new OneOrEmptyConverter(1), new TestConverter())); WorkUnitState workUnitState = new WorkUnitState(); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Iterable<Object> convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 20); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(20), new OneOrEmptyConverter(10), new TestConverter())); workUnitState = new WorkUnitState(); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 2); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(20), new OneOrEmptyConverter(10), new MultiIdentityConverter(10), new TestConverter())); workUnitState = new WorkUnitState(); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 20); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } } @Test public void testConversionWithEmptyConverter() throws Exception { WorkUnitState workUnitState = new WorkUnitState(); MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new EmptyConverter(), new SchemaSimplificationConverter(), new TestConverter())); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Assert.assertFalse(multiConverter.convertRecord(schema, TEST_RECORD, workUnitState).iterator().hasNext()); multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new EmptyConverter(), new TestConverter())); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Assert.assertFalse(multiConverter.convertRecord(schema, TEST_RECORD, workUnitState).iterator().hasNext()); multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new TestConverter(), new EmptyConverter())); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Assert.assertFalse(multiConverter.convertRecord(schema, TEST_RECORD, workUnitState).iterator().hasNext()); multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(5), new TestConverter(), new EmptyConverter())); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Assert.assertFalse(multiConverter.convertRecord(schema, TEST_RECORD, workUnitState).iterator().hasNext()); multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new EmptyConverter(), new MultiIdentityConverter(5), new TestConverter())); schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Assert.assertFalse(multiConverter.convertRecord(schema, TEST_RECORD, workUnitState).iterator().hasNext()); } @Test public void testConversionWithoutConverters() throws Exception { MultiConverter multiConverter = new MultiConverter( Lists.<Converter<? extends Object, ? extends Object, ? extends Object, ? extends Object>> newArrayList()); WorkUnitState workUnitState = new WorkUnitState(); Assert.assertEquals(TEST_SCHEMA, multiConverter.convertSchema(TEST_SCHEMA, workUnitState)); Assert.assertEquals(TEST_RECORD, multiConverter.convertRecord(TEST_SCHEMA, TEST_RECORD, workUnitState).iterator() .next()); } private void checkConvertedAvroData(Schema schema, GenericRecord record) { Assert.assertEquals(schema.getNamespace(), "example.avro"); Assert.assertEquals(schema.getType(), Schema.Type.RECORD); Assert.assertEquals(schema.getName(), "User"); Assert.assertEquals(schema.getFields().size(), 3); Schema.Field nameField = schema.getField("name"); Assert.assertEquals(nameField.name(), "name"); Assert.assertEquals(nameField.schema().getType(), Schema.Type.STRING); Schema.Field favNumberField = schema.getField("favorite_number"); Assert.assertEquals(favNumberField.name(), "favorite_number"); Assert.assertEquals(favNumberField.schema().getType(), Schema.Type.INT); Schema.Field favColorField = schema.getField("favorite_color"); Assert.assertEquals(favColorField.name(), "favorite_color"); Assert.assertEquals(favColorField.schema().getType(), Schema.Type.STRING); Assert.assertEquals(record.get("name"), "Alyssa"); Assert.assertEquals(record.get("favorite_number"), 256d); Assert.assertEquals(record.get("favorite_color"), "yellow"); } /** * A {@link Converter} that simplifies the input data records. */ private static class SchemaSimplificationConverter extends Converter<String, String, String, String> { private static final Gson GSON = new Gson(); @Override public String convertSchema(String inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<String> convertRecord(String outputSchema, String inputRecord, WorkUnitState workUnit) throws DataConversionException { JsonElement element = GSON.fromJson(inputRecord, JsonObject.class).get("fields"); return new SingleRecordIterable<String>(element.getAsJsonObject().toString()); } } /** * A {@link Converter} that returns the input schema and data records as they are. */ private static class IdentityConverter extends Converter<Object, Object, Object, Object> { @Override public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<Object> convertRecord(Object outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException { return new SingleRecordIterable<Object>(inputRecord); } } /** * A {@link Converter} that returns the input schema and data records as they are but with a given multiplicity. */ private static class MultiIdentityConverter extends Converter<Object, Object, Object, Object> { private final int multiplicity; public MultiIdentityConverter(int multiplicity) { this.multiplicity = multiplicity; } @Override public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<Object> convertRecord(Object outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException { List<Object> records = Lists.newArrayList(); for (int i = 0; i < this.multiplicity; i++) { records.add(inputRecord); } return records; } } /** * A {@link Converter} that returns no converted data record. */ private static class EmptyConverter extends Converter<Object, Object, Object, Object> { @Override public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<Object> convertRecord(Object outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException { return new EmptyIterable<Object>(); } } /** * A {@link Converter} which will alternate between returning a {@link EmptyIterable()}, * a {@link SingleRecordIterable()}, or a {@link List()}. The number of records {@link List()} is controlled by the * multiplicity config in the constructor, similar to {@link MultiIdentityConverter()}. */ private static class AlternatingConverter extends Converter<Object, Object, Object, Object> { private int executionCount = 0; private final int multiplicity; public AlternatingConverter(int multiplicity) { this.multiplicity = multiplicity; } @Override public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<Object> convertRecord(Object outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException { this.executionCount++; if (this.executionCount > 3) { this.executionCount = 1; } if (this.executionCount == 1) { return new EmptyIterable<Object>(); } else if (this.executionCount == 2) { return new SingleRecordIterable<Object>(inputRecord); } else if (this.executionCount == 3) { List<Object> records = Lists.newArrayList(); for (int i = 0; i < this.multiplicity; i++) { records.add(inputRecord); } return records; } else { throw new DataConversionException("Execution count must always be 1, 2, or 3"); } } } /** * A {@link Converter} which will return a {@link SingleRecordIterable()} every "x" number of calls to * convertRecord. Every other time it will return an {@link EmptyIterable()} */ private static class OneOrEmptyConverter extends Converter<Object, Object, Object, Object> { private int executionCount = 0; private final int recordNum; /** * * @param recordNum is the frequency at which a {@link SingleRecordIterable()} will be returned. This iterable will * be a simple wrapped of the input record. */ public OneOrEmptyConverter(int recordNum) { this.recordNum = recordNum; } @Override public Object convertSchema(Object inputSchema, WorkUnitState workUnit) throws SchemaConversionException { return inputSchema; } @Override public Iterable<Object> convertRecord(Object outputSchema, Object inputRecord, WorkUnitState workUnit) throws DataConversionException { if (this.executionCount % this.recordNum == 0) { this.executionCount++; return new SingleRecordIterable<Object>(inputRecord); } else { this.executionCount++; return new EmptyIterable<Object>(); } } } }