/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.gcp.bigquery;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.verifyNoMoreInteractions;
import static org.mockito.Mockito.when;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.Dataset;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.JobStatistics;
import com.google.api.services.bigquery.model.JobStatistics2;
import com.google.api.services.bigquery.model.JobStatus;
import com.google.api.services.bigquery.model.Table;
import com.google.api.services.bigquery.model.TableCell;
import com.google.api.services.bigquery.model.TableDataList;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.common.collect.ImmutableList;
import com.google.common.io.BaseEncoding;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
/**
* Tests for {@link BigQueryTableRowIterator}.
*/
@RunWith(JUnit4.class)
public class BigQueryTableRowIteratorTest {
@Rule public ExpectedException thrown = ExpectedException.none();
@Mock private Bigquery mockClient;
@Mock private Bigquery.Datasets mockDatasets;
@Mock private Bigquery.Datasets.Delete mockDatasetsDelete;
@Mock private Bigquery.Datasets.Insert mockDatasetsInsert;
@Mock private Bigquery.Jobs mockJobs;
@Mock private Bigquery.Jobs.Get mockJobsGet;
@Mock private Bigquery.Jobs.Insert mockJobsInsert;
@Mock private Bigquery.Tables mockTables;
@Mock private Bigquery.Tables.Get mockTablesGet;
@Mock private Bigquery.Tables.Delete mockTablesDelete;
@Mock private Bigquery.Tabledata mockTabledata;
@Mock private Bigquery.Tabledata.List mockTabledataList;
@Before
public void setUp() throws IOException {
MockitoAnnotations.initMocks(this);
when(mockClient.tabledata()).thenReturn(mockTabledata);
when(mockTabledata.list(anyString(), anyString(), anyString())).thenReturn(mockTabledataList);
when(mockClient.tables()).thenReturn(mockTables);
when(mockTables.delete(anyString(), anyString(), anyString())).thenReturn(mockTablesDelete);
when(mockTables.get(anyString(), anyString(), anyString())).thenReturn(mockTablesGet);
when(mockClient.datasets()).thenReturn(mockDatasets);
when(mockDatasets.delete(anyString(), anyString())).thenReturn(mockDatasetsDelete);
when(mockDatasets.insert(anyString(), any(Dataset.class))).thenReturn(mockDatasetsInsert);
when(mockClient.jobs()).thenReturn(mockJobs);
when(mockJobs.insert(anyString(), any(Job.class))).thenReturn(mockJobsInsert);
when(mockJobs.get(anyString(), anyString())).thenReturn(mockJobsGet);
}
@After
public void tearDown() {
verifyNoMoreInteractions(mockClient);
verifyNoMoreInteractions(mockDatasets);
verifyNoMoreInteractions(mockDatasetsDelete);
verifyNoMoreInteractions(mockDatasetsInsert);
verifyNoMoreInteractions(mockJobs);
verifyNoMoreInteractions(mockJobsGet);
verifyNoMoreInteractions(mockJobsInsert);
verifyNoMoreInteractions(mockTables);
verifyNoMoreInteractions(mockTablesDelete);
verifyNoMoreInteractions(mockTablesGet);
verifyNoMoreInteractions(mockTabledata);
verifyNoMoreInteractions(mockTabledataList);
}
private static Table tableWithBasicSchema() {
return new Table()
.setSchema(
new TableSchema()
.setFields(
Arrays.asList(
new TableFieldSchema().setName("name").setType("STRING"),
new TableFieldSchema().setName("answer").setType("INTEGER"),
new TableFieldSchema().setName("photo").setType("BYTES"),
new TableFieldSchema().setName("anniversary_date").setType("DATE"),
new TableFieldSchema().setName("anniversary_datetime").setType("DATETIME"),
new TableFieldSchema().setName("anniversary_time").setType("TIME"))));
}
private static Table noTableQuerySchema() {
return new Table()
.setSchema(
new TableSchema()
.setFields(
Arrays.asList(
new TableFieldSchema().setName("name").setType("STRING"),
new TableFieldSchema().setName("count").setType("INTEGER"),
new TableFieldSchema().setName("photo").setType("BYTES"))));
}
private static Table tableWithLocation() {
return new Table()
.setLocation("EU");
}
private TableRow rawRow(Object... args) {
List<TableCell> cells = new LinkedList<>();
for (Object a : args) {
cells.add(new TableCell().setV(a));
}
return new TableRow().setF(cells);
}
private TableDataList rawDataList(TableRow... rows) {
return new TableDataList().setRows(Arrays.asList(rows));
}
/**
* Verifies that when the query runs, the correct data is returned and the temporary dataset and
* table are both cleaned up.
*/
@Test
public void testReadFromQuery() throws IOException, InterruptedException {
// Mock job inserting.
Job dryRunJob = new Job().setStatistics(
new JobStatistics().setQuery(new JobStatistics2().setReferencedTables(
ImmutableList.of(new TableReference()))));
Job insertedJob = new Job().setJobReference(new JobReference());
when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
// Mock job polling.
JobStatus status = new JobStatus().setState("DONE");
JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery()
.setDestinationTable(new TableReference()
.setProjectId("project")
.setDatasetId("tempdataset")
.setTableId("temptable"));
Job getJob =
new Job()
.setJobReference(new JobReference())
.setStatus(status)
.setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
when(mockJobsGet.execute()).thenReturn(getJob);
// Mock table schema fetch.
when(mockTablesGet.execute()).thenReturn(tableWithLocation(), tableWithBasicSchema());
byte[] photoBytes = "photograph".getBytes();
String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
// Mock table data fetch.
when(mockTabledataList.execute()).thenReturn(
rawDataList(rawRow("Arthur", 42, photoBytesEncoded,
"2000-01-01", "2000-01-01 00:00:00.000005", "00:00:00.000005")));
// Run query and verify
String query = "SELECT name, count, photo, anniversary_date, "
+ "anniversary_datetime, anniversary_time from table";
JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
try (BigQueryTableRowIterator iterator =
BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
iterator.open();
assertTrue(iterator.advance());
TableRow row = iterator.getCurrent();
assertTrue(row.containsKey("name"));
assertTrue(row.containsKey("answer"));
assertTrue(row.containsKey("photo"));
assertTrue(row.containsKey("anniversary_date"));
assertTrue(row.containsKey("anniversary_datetime"));
assertTrue(row.containsKey("anniversary_time"));
assertEquals("Arthur", row.get("name"));
assertEquals(42, row.get("answer"));
assertEquals(photoBytesEncoded, row.get("photo"));
assertEquals("2000-01-01", row.get("anniversary_date"));
assertEquals("2000-01-01 00:00:00.000005", row.get("anniversary_datetime"));
assertEquals("00:00:00.000005", row.get("anniversary_time"));
assertFalse(iterator.advance());
}
// Temp dataset created and later deleted.
verify(mockClient, times(2)).datasets();
verify(mockDatasets).insert(anyString(), any(Dataset.class));
verify(mockDatasetsInsert).execute();
verify(mockDatasets).delete(anyString(), anyString());
verify(mockDatasetsDelete).execute();
// Job inserted to run the query, polled once.
verify(mockClient, times(3)).jobs();
verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
verify(mockJobsInsert, times(2)).execute();
verify(mockJobs).get(anyString(), anyString());
verify(mockJobsGet).execute();
// Temp table get after query finish, deleted after reading.
verify(mockClient, times(3)).tables();
verify(mockTables, times(2)).get(anyString(), anyString(), anyString());
verify(mockTablesGet, times(2)).execute();
verify(mockTables).delete(anyString(), anyString(), anyString());
verify(mockTablesDelete).execute();
// Table data read.
verify(mockClient).tabledata();
verify(mockTabledata).list("project", "tempdataset", "temptable");
verify(mockTabledataList).execute();
}
/**
* Verifies that queries that reference no data can be read.
*/
@Test
public void testReadFromQueryNoTables() throws IOException, InterruptedException {
// Mock job inserting.
Job dryRunJob = new Job().setStatistics(
new JobStatistics().setQuery(new JobStatistics2()));
Job insertedJob = new Job().setJobReference(new JobReference());
when(mockJobsInsert.execute()).thenReturn(dryRunJob, insertedJob);
// Mock job polling.
JobStatus status = new JobStatus().setState("DONE");
JobConfigurationQuery resultQueryConfig = new JobConfigurationQuery()
.setDestinationTable(new TableReference()
.setProjectId("project")
.setDatasetId("tempdataset")
.setTableId("temptable"));
Job getJob =
new Job()
.setJobReference(new JobReference())
.setStatus(status)
.setConfiguration(new JobConfiguration().setQuery(resultQueryConfig));
when(mockJobsGet.execute()).thenReturn(getJob);
// Mock table schema fetch.
when(mockTablesGet.execute()).thenReturn(noTableQuerySchema());
byte[] photoBytes = "photograph".getBytes();
String photoBytesEncoded = BaseEncoding.base64().encode(photoBytes);
// Mock table data fetch.
when(mockTabledataList.execute()).thenReturn(
rawDataList(rawRow("Arthur", 42, photoBytesEncoded)));
// Run query and verify
String query = String.format(
"SELECT \"Arthur\" as name, 42 as count, \"%s\" as photo",
photoBytesEncoded);
JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery(query);
try (BigQueryTableRowIterator iterator =
BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
iterator.open();
assertTrue(iterator.advance());
TableRow row = iterator.getCurrent();
assertTrue(row.containsKey("name"));
assertTrue(row.containsKey("count"));
assertTrue(row.containsKey("photo"));
assertEquals("Arthur", row.get("name"));
assertEquals(42, row.get("count"));
assertEquals(photoBytesEncoded, row.get("photo"));
assertFalse(iterator.advance());
}
// Temp dataset created and later deleted.
verify(mockClient, times(2)).datasets();
verify(mockDatasets).insert(anyString(), any(Dataset.class));
verify(mockDatasetsInsert).execute();
verify(mockDatasets).delete(anyString(), anyString());
verify(mockDatasetsDelete).execute();
// Job inserted to run the query, polled once.
verify(mockClient, times(3)).jobs();
verify(mockJobs, times(2)).insert(anyString(), any(Job.class));
verify(mockJobsInsert, times(2)).execute();
verify(mockJobs).get(anyString(), anyString());
verify(mockJobsGet).execute();
// Temp table get after query finish, deleted after reading.
verify(mockClient, times(2)).tables();
verify(mockTables, times(1)).get(anyString(), anyString(), anyString());
verify(mockTablesGet, times(1)).execute();
verify(mockTables).delete(anyString(), anyString(), anyString());
verify(mockTablesDelete).execute();
// Table data read.
verify(mockClient).tabledata();
verify(mockTabledata).list("project", "tempdataset", "temptable");
verify(mockTabledataList).execute();
}
/**
* Verifies that when the query fails, the user gets a useful exception and the temporary dataset
* is cleaned up. Also verifies that the temporary table (which is never created) is not
* erroneously attempted to be deleted.
*/
@Test
public void testQueryFailed() throws IOException {
// Job state polled with an error.
String errorReason = "bad query";
Exception exception = new IOException(errorReason);
when(mockJobsInsert.execute()).thenThrow(exception, exception, exception, exception);
JobConfigurationQuery queryConfig = new JobConfigurationQuery().setQuery("NOT A QUERY");
try (BigQueryTableRowIterator iterator =
BigQueryTableRowIterator.fromQuery(queryConfig, "project", mockClient)) {
iterator.open();
fail();
} catch (Exception expected) {
// Verify message explains cause and reports the query.
assertThat(expected.getMessage(), containsString("Error"));
assertThat(expected.getMessage(), containsString("NOT A QUERY"));
assertThat(expected.getCause().getMessage(), containsString(errorReason));
}
// Job inserted to run the query, then polled once.
verify(mockClient, times(1)).jobs();
verify(mockJobs).insert(anyString(), any(Job.class));
verify(mockJobsInsert, times(4)).execute();
}
}