/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.gcp.bigquery;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createJobIdToken;
import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.createTempTableReference;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfigurationQuery;
import com.google.api.services.bigquery.model.JobReference;
import com.google.api.services.bigquery.model.JobStatistics;
import com.google.api.services.bigquery.model.TableReference;
import com.google.api.services.bigquery.model.TableRow;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.transforms.display.DisplayData;
/**
* A {@link BigQuerySourceBase} for querying BigQuery tables.
*/
@VisibleForTesting
class BigQueryQuerySource extends BigQuerySourceBase {
static BigQueryQuerySource create(
String stepUuid,
ValueProvider<String> query,
Boolean flattenResults,
Boolean useLegacySql,
BigQueryServices bqServices) {
return new BigQueryQuerySource(
stepUuid,
query,
flattenResults,
useLegacySql,
bqServices);
}
private final ValueProvider<String> query;
private final Boolean flattenResults;
private final Boolean useLegacySql;
private transient AtomicReference<JobStatistics> dryRunJobStats;
private BigQueryQuerySource(
String stepUuid,
ValueProvider<String> query,
Boolean flattenResults,
Boolean useLegacySql,
BigQueryServices bqServices) {
super(stepUuid, bqServices);
this.query = checkNotNull(query, "query");
this.flattenResults = checkNotNull(flattenResults, "flattenResults");
this.useLegacySql = checkNotNull(useLegacySql, "useLegacySql");
this.dryRunJobStats = new AtomicReference<>();
}
@Override
public long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
return dryRunQueryIfNeeded(bqOptions).getTotalBytesProcessed();
}
@Override
public BoundedReader<TableRow> createReader(PipelineOptions options) throws IOException {
BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
return new BigQueryReader(this, bqServices.getReaderFromQuery(
bqOptions, bqOptions.getProject(), createBasicQueryConfig()));
}
@Override
protected TableReference getTableToExtract(BigQueryOptions bqOptions)
throws IOException, InterruptedException {
// 1. Find the location of the query.
String location = null;
List<TableReference> referencedTables =
dryRunQueryIfNeeded(bqOptions).getQuery().getReferencedTables();
DatasetService tableService = bqServices.getDatasetService(bqOptions);
if (referencedTables != null && !referencedTables.isEmpty()) {
TableReference queryTable = referencedTables.get(0);
location = tableService.getTable(queryTable).getLocation();
}
// 2. Create the temporary dataset in the query location.
TableReference tableToExtract = createTempTableReference(
bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
tableService.createDataset(
tableToExtract.getProjectId(),
tableToExtract.getDatasetId(),
location,
"Dataset for BigQuery query job temporary table");
// 3. Execute the query.
String queryJobId = createJobIdToken(bqOptions.getJobName(), stepUuid) + "-query";
executeQuery(
bqOptions.getProject(),
queryJobId,
tableToExtract,
bqServices.getJobService(bqOptions));
return tableToExtract;
}
@Override
protected void cleanupTempResource(BigQueryOptions bqOptions) throws Exception {
TableReference tableToRemove = createTempTableReference(
bqOptions.getProject(), createJobIdToken(bqOptions.getJobName(), stepUuid));
DatasetService tableService = bqServices.getDatasetService(bqOptions);
tableService.deleteTable(tableToRemove);
tableService.deleteDataset(tableToRemove.getProjectId(), tableToRemove.getDatasetId());
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
super.populateDisplayData(builder);
builder.add(DisplayData.item("query", query));
}
private synchronized JobStatistics dryRunQueryIfNeeded(BigQueryOptions bqOptions)
throws InterruptedException, IOException {
if (dryRunJobStats.get() == null) {
JobStatistics jobStats = bqServices.getJobService(bqOptions).dryRunQuery(
bqOptions.getProject(), createBasicQueryConfig());
dryRunJobStats.compareAndSet(null, jobStats);
}
return dryRunJobStats.get();
}
private void executeQuery(
String executingProject,
String jobId,
TableReference destinationTable,
JobService jobService) throws IOException, InterruptedException {
JobReference jobRef = new JobReference()
.setProjectId(executingProject)
.setJobId(jobId);
JobConfigurationQuery queryConfig = createBasicQueryConfig()
.setAllowLargeResults(true)
.setCreateDisposition("CREATE_IF_NEEDED")
.setDestinationTable(destinationTable)
.setPriority("BATCH")
.setWriteDisposition("WRITE_EMPTY");
jobService.startQueryJob(jobRef, queryConfig);
Job job = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
if (BigQueryHelpers.parseStatus(job) != Status.SUCCEEDED) {
throw new IOException(String.format(
"Query job %s failed, status: %s.", jobId,
BigQueryHelpers.statusToPrettyString(job.getStatus())));
}
}
private JobConfigurationQuery createBasicQueryConfig() {
return new JobConfigurationQuery()
.setFlattenResults(flattenResults)
.setQuery(query.get())
.setUseLegacySql(useLegacySql);
}
private void readObject(ObjectInputStream in) throws ClassNotFoundException, IOException {
in.defaultReadObject();
dryRunJobStats = new AtomicReference<>();
}
}