/*
* This file is a component of thundr, a software library from 3wks.
* Read more: http://www.3wks.com.au/thundr
* Copyright (C) 2013 3wks, <thundr@3wks.com.au>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.threewks.thundr.bigquery;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import jodd.util.StringUtil;
import com.google.api.client.http.AbstractInputStreamContent;
import com.google.api.client.http.ByteArrayContent;
import com.google.api.services.bigquery.Bigquery;
import com.google.api.services.bigquery.model.Job;
import com.google.api.services.bigquery.model.JobConfiguration;
import com.google.api.services.bigquery.model.JobConfigurationLoad;
import com.google.api.services.bigquery.model.TableReference;
import com.google.appengine.api.taskqueue.Queue;
import com.google.appengine.api.taskqueue.TaskHandle;
import com.google.appengine.api.taskqueue.TaskOptions;
import com.google.appengine.api.taskqueue.TaskOptions.Method;
import com.google.common.base.Function;
import com.google.common.collect.Maps;
import com.threewks.thundr.exception.BaseException;
import com.threewks.thundr.logger.Logger;
public class BigQueryPushServiceImpl implements BigQueryPushService {
private static final String CSV_CONTENT_TYPE = "application/octet-stream";
private Queue bigQueryQueue;
private String projectId;
private String datasetId;
private Bigquery bigQuery;
private Map<Class<?>, EventConverter<?>> converters = new HashMap<Class<?>, EventConverter<?>>();
public BigQueryPushServiceImpl(Bigquery bigQuery, Queue bigQueryQueue, String bigQueryProjectId, String bigQueryDatasetId) {
this.bigQuery = bigQuery;
this.bigQueryQueue = bigQueryQueue;
this.projectId = bigQueryProjectId;
this.datasetId = bigQueryDatasetId;
}
private void updateTable(String tableId, String csvData) throws IOException {
executeLoad(bigQuery, projectId, datasetId, tableId, csvData);
}
@Override
public <T> void registerEventConverter(EventConverter<T> converter) {
converters.put(converter.getEventClass(), converter);
}
@SuppressWarnings("unchecked")
@Override
public <T> void trackEvent(T event) {
EventConverter<T> converter = (EventConverter<T>) converters.get(event.getClass());
if (converter == null) {
throw new BaseException("No converter registered for class %s", event.getClass());
}
queueEvent(converter.getTableId(), converter.getCsvData(event));
}
private void queueEvent(String tableId, String csvData) {
TaskOptions task = TaskOptions.Builder.withMethod(Method.PULL);
task.param("tableId", tableId);
task.param("data", csvData);
bigQueryQueue.add(task);
}
@Override
public int processTasks() throws IOException {
Logger.debug("Processing tasks from queue %s\n", bigQueryQueue.getQueueName());
int count = 0;
List<TaskHandle> tasks = bigQueryQueue.leaseTasks(10, TimeUnit.MINUTES, 1000);
if (!tasks.isEmpty()) {
Map<String, String> reportData = buildReportData(tasks);
for (Map.Entry<String, String> entry : reportData.entrySet()) {
updateTable(entry.getKey(), entry.getValue());
}
count += tasks.size();
bigQueryQueue.deleteTask(tasks);
}
return count;
}
/**
* BigQuery tasks are added to the queue with the parameters 'tableId' and 'data'.
* tableId is the id of the table and data is a csv representation of the data to
* be added to the table.
*
* @param a collection of tasks to extract the data from.
* @return a map of table ids to csv data.
*/
private Map<String, String> buildReportData(Collection<TaskHandle> tasks) {
Map<String, StringBuilder> reportData = new LinkedHashMap<String, StringBuilder>();
for (TaskHandle task : tasks) {
String tableId = getParam(task, "tableId");
String data = getParam(task, "data");
if (StringUtil.isNotBlank(data)) {
if (!reportData.containsKey(tableId)) {
StringBuilder csv = new StringBuilder();
reportData.put(tableId, csv);
}
reportData.get(tableId).append(String.format("%s\n", data));
}
}
// convert the CSV StringBuilders to strings before returning
return new LinkedHashMap<String, String>(Maps.transformValues(reportData, new Function<StringBuilder, String>() {
public String apply(StringBuilder s) {
return s.toString();
}
}));
}
/**
* Execute a 'load' API call to append data into an existing BigQuery table.
*
* @param bigQuery the {@link Bigquery} object to access the BigQuery API.
* @param projectId the BigQuery project id.
* @param datasetId the BigQuery dataset id.
* @param tableId the BigQuery table id.
* @param csvData the CSV data to load into the table.
* @throws IOException if something goes wrong running the job.
*/
private void executeLoad(Bigquery bigQuery, String projectId, String datasetId, String tableId, String csvData) throws IOException {
TableReference tableReference = new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId);
JobConfigurationLoad loadConfig = new JobConfigurationLoad().setDestinationTable(tableReference).setAllowQuotedNewlines(true);
JobConfiguration config = new JobConfiguration().setLoad(loadConfig);
Job job = new Job().setConfiguration(config);
AbstractInputStreamContent content = new ByteArrayContent(CSV_CONTENT_TYPE, csvData.getBytes());
bigQuery.jobs().insert(projectId, job, content).execute();
}
/**
* Get a parameter from a task.
*
* @param task the task to get the parameter from.
* @param name the name of the parameter to get.
* @return the value of the parameter or null if no matching parameter could be found.
*/
private String getParam(TaskHandle task, String name) {
try {
List<Entry<String, String>> params = task.extractParams();
for (Entry<String, String> param : params) {
if (param.getKey().equals(name)) {
return param.getValue();
}
}
} catch (Exception e) {
// ignore exceptions, just return null
}
return null;
}
}