/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.beam.sdk.io.gcp.bigquery; import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import com.google.api.services.bigquery.model.TableFieldSchema; import com.google.api.services.bigquery.model.TableRow; import com.google.api.services.bigquery.model.TableSchema; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.io.BaseEncoding; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericRecord; import org.apache.avro.reflect.Nullable; import org.apache.avro.util.Utf8; import org.apache.beam.sdk.coders.AvroCoder; import org.apache.beam.sdk.coders.DefaultCoder; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; /** * Tests for {@link BigQueryAvroUtils}. */ @RunWith(JUnit4.class) public class BigQueryAvroUtilsTest { private List<TableFieldSchema> subFields = Lists.<TableFieldSchema>newArrayList( new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE")); /* * Note that the quality and quantity fields do not have their mode set, so they should default * to NULLABLE. This is an important test of BigQuery semantics. * * All the other fields we set in this function are required on the Schema response. * * See https://cloud.google.com/bigquery/docs/reference/v2/tables#schema */ private List<TableFieldSchema> fields = Lists.newArrayList( new TableFieldSchema().setName("number").setType("INTEGER").setMode("REQUIRED"), new TableFieldSchema().setName("species").setType("STRING").setMode("NULLABLE"), new TableFieldSchema().setName("quality").setType("FLOAT") /* default to NULLABLE */, new TableFieldSchema().setName("quantity").setType("INTEGER") /* default to NULLABLE */, new TableFieldSchema().setName("birthday").setType("TIMESTAMP").setMode("NULLABLE"), new TableFieldSchema().setName("flighted").setType("BOOLEAN").setMode("NULLABLE"), new TableFieldSchema().setName("sound").setType("BYTES").setMode("NULLABLE"), new TableFieldSchema().setName("anniversaryDate").setType("DATE").setMode("NULLABLE"), new TableFieldSchema().setName("anniversaryDatetime") .setType("DATETIME").setMode("NULLABLE"), new TableFieldSchema().setName("anniversaryTime").setType("TIME").setMode("NULLABLE"), new TableFieldSchema().setName("scion").setType("RECORD").setMode("NULLABLE") .setFields(subFields), new TableFieldSchema().setName("associates").setType("RECORD").setMode("REPEATED") .setFields(subFields)); @Test public void testConvertGenericRecordToTableRow() throws Exception { TableSchema tableSchema = new TableSchema(); tableSchema.setFields(fields); Schema avroSchema = AvroCoder.of(Bird.class).getSchema(); { // Test nullable fields. GenericRecord record = new GenericData.Record(avroSchema); record.put("number", 5L); TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema); TableRow row = new TableRow() .set("number", "5") .set("associates", new ArrayList<TableRow>()); assertEquals(row, convertedRow); } { // Test type conversion for: // INTEGER, FLOAT, TIMESTAMP, BOOLEAN, BYTES, DATE, DATETIME, TIME. GenericRecord record = new GenericData.Record(avroSchema); byte[] soundBytes = "chirp,chirp".getBytes(); ByteBuffer soundByteBuffer = ByteBuffer.wrap(soundBytes); soundByteBuffer.rewind(); record.put("number", 5L); record.put("quality", 5.0); record.put("birthday", 5L); record.put("flighted", Boolean.TRUE); record.put("sound", soundByteBuffer); record.put("anniversaryDate", new Utf8("2000-01-01")); record.put("anniversaryDatetime", new String("2000-01-01 00:00:00.000005")); record.put("anniversaryTime", new Utf8("00:00:00.000005")); TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema); TableRow row = new TableRow() .set("number", "5") .set("birthday", "1970-01-01 00:00:00.000005 UTC") .set("quality", 5.0) .set("associates", new ArrayList<TableRow>()) .set("flighted", Boolean.TRUE) .set("sound", BaseEncoding.base64().encode(soundBytes)) .set("anniversaryDate", "2000-01-01") .set("anniversaryDatetime", "2000-01-01 00:00:00.000005") .set("anniversaryTime", "00:00:00.000005"); assertEquals(row, convertedRow); } { // Test repeated fields. Schema subBirdSchema = AvroCoder.of(Bird.SubBird.class).getSchema(); GenericRecord nestedRecord = new GenericData.Record(subBirdSchema); nestedRecord.put("species", "other"); GenericRecord record = new GenericData.Record(avroSchema); record.put("number", 5L); record.put("associates", Lists.<GenericRecord>newArrayList(nestedRecord)); TableRow convertedRow = BigQueryAvroUtils.convertGenericRecordToTableRow(record, tableSchema); TableRow row = new TableRow() .set("associates", Lists.<TableRow>newArrayList( new TableRow().set("species", "other"))) .set("number", "5"); assertEquals(row, convertedRow); } } @Test public void testConvertBigQuerySchemaToAvroSchema() { TableSchema tableSchema = new TableSchema(); tableSchema.setFields(fields); Schema avroSchema = BigQueryAvroUtils.toGenericAvroSchema("testSchema", tableSchema.getFields()); assertThat(avroSchema.getField("number").schema(), equalTo(Schema.create(Type.LONG))); assertThat( avroSchema.getField("species").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING)))); assertThat( avroSchema.getField("quality").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.DOUBLE)))); assertThat( avroSchema.getField("quantity").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)))); assertThat( avroSchema.getField("birthday").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.LONG)))); assertThat( avroSchema.getField("flighted").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BOOLEAN)))); assertThat( avroSchema.getField("sound").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.BYTES)))); assertThat( avroSchema.getField("anniversaryDate").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING)))); assertThat( avroSchema.getField("anniversaryDatetime").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING)))); assertThat( avroSchema.getField("anniversaryTime").schema(), equalTo(Schema.createUnion(Schema.create(Type.NULL), Schema.create(Type.STRING)))); assertThat( avroSchema.getField("scion").schema(), equalTo( Schema.createUnion( Schema.create(Type.NULL), Schema.createRecord( "scion", "org.apache.beam.sdk.io.gcp.bigquery", "Translated Avro Schema for scion", false, ImmutableList.of( new Field( "species", Schema.createUnion( Schema.create(Type.NULL), Schema.create(Type.STRING)), null, (Object) null)))))); assertThat( avroSchema.getField("associates").schema(), equalTo( Schema.createArray( Schema.createRecord( "associates", "org.apache.beam.sdk.io.gcp.bigquery", "Translated Avro Schema for associates", false, ImmutableList.of( new Field( "species", Schema.createUnion( Schema.create(Type.NULL), Schema.create(Type.STRING)), null, (Object) null)))))); } /** * Pojo class used as the record type in tests. */ @DefaultCoder(AvroCoder.class) @SuppressWarnings("unused") // Used by Avro reflection. static class Bird { long number; @Nullable String species; @Nullable Double quality; @Nullable Long quantity; @Nullable Long birthday; // Exercises TIMESTAMP. @Nullable Boolean flighted; @Nullable ByteBuffer sound; @Nullable Utf8 anniversaryDate; @Nullable String anniversaryDatetime; @Nullable Utf8 anniversaryTime; @Nullable SubBird scion; SubBird[] associates; static class SubBird { @Nullable String species; public SubBird() {} } public Bird() { associates = new SubBird[1]; associates[0] = new SubBird(); } } }