package nl.us2.cloudpelican.stormprocessor; /** * Created by robin on 07/06/15. */ import backtype.storm.task.OutputCollector; import backtype.storm.task.TopologyContext; import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; import com.google.api.client.http.HttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.client.json.jackson2.JacksonFactory; import com.google.api.client.util.SecurityUtils; import com.google.api.services.bigquery.Bigquery; import com.google.api.services.bigquery.model.*; import org.apache.commons.codec.binary.Base64; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.security.PrivateKey; import java.util.*; /** * * @author robin */ public class BigQuerySinkBolt extends AbstractSinkBolt { private static final Logger LOG = LoggerFactory.getLogger(BigQuerySinkBolt.class); public String projectId; public String datasetId; private String serviceAccountId; private String pk12KeyBase64; private PrivateKey pk12; private GoogleCredential googleCredential; private HttpTransport httpTransport; public Bigquery bigquery; private HashMap<String, Boolean> preparedTablesCache; private static final String STORAGE_SCOPE = "https://www.googleapis.com/auth/bigquery"; public static final int TABLE_STRUCTURE_VERSION = 1; private static JsonFactory JSON_FACTORY; public BigQuerySinkBolt(String sinkId, Settings settings) { super(sinkId, settings); } public boolean isValid() { projectId = getSinkVar("project_id").trim(); datasetId = getSinkVar("dataset_id").trim(); serviceAccountId = getSinkVar("service_account_id").trim(); pk12KeyBase64 = getSinkVar("pk12base64").trim(); // Init key try { byte[] keyBytes = Base64.decodeBase64(pk12KeyBase64); ByteArrayInputStream bis = new ByteArrayInputStream(keyBytes); pk12 = SecurityUtils.loadPrivateKeyFromKeyStore(SecurityUtils.getPkcs12KeyStore(), bis, "notasecret", "privatekey", "notasecret"); LOG.info("Loaded PK12 key"); } catch (Exception e) { LOG.error("Failed to load private key", e); return false; } return !projectId.isEmpty() && !datasetId.isEmpty() && !serviceAccountId.isEmpty(); } public void prepareSink(Map conf, TopologyContext context, OutputCollector collector) { isValid(); // Call isvalid to load key // Transport try { httpTransport = GoogleNetHttpTransport.newTrustedTransport(); } catch (Exception e) { LOG.error("Failed to init transport", e); System.exit(1); } // JSON JSON_FACTORY = JacksonFactory.getDefaultInstance(); // Build a service account credential. googleCredential = new GoogleCredential.Builder().setTransport(httpTransport) .setJsonFactory(JSON_FACTORY) .setServiceAccountId(serviceAccountId) .setServiceAccountScopes(Collections.singleton(STORAGE_SCOPE)) .setServiceAccountPrivateKey(pk12) .build(); // BigQuery bigquery = new Bigquery.Builder(httpTransport, JSON_FACTORY, googleCredential).setApplicationName(Main.class.getSimpleName()).build(); // Cache preparedTablesCache = new HashMap<String, Boolean>(); } protected void _flush() { for (Map.Entry<String, ArrayList<String>> kv : resultAggregator.entrySet()) { try { // Write to Google format ArrayList<TableDataInsertAllRequest.Rows> rows = new ArrayList<TableDataInsertAllRequest.Rows>(); for (String line : kv.getValue()) { // Record Map<String, Object> rowData = new HashMap<String, Object>(); rowData.put("_raw", line); TableDataInsertAllRequest.Rows row = new TableDataInsertAllRequest.Rows().setJson(rowData); rows.add(row); } // Execute async executor.execute(new BigQueryInsertRunnable(this, kv.getKey(), rows)); } catch (Exception e) { LOG.error("Failed to write data of " + kv.getKey() + " to BigQuery", e); } } resultAggregator.clear(); } protected void prepareTable(String name) { // From cache? if (preparedTablesCache.containsKey(name)) { return; } LOG.info("Preparing table " + name); // Check table boolean tableExists = false; try { bigquery.tables().get(projectId, datasetId, name).executeUsingHead(); tableExists = true; } catch (Exception e) { LOG.error("Failed to check table", e); } // Does the table exist? if (tableExists) { LOG.info("Table exists " + String.valueOf(tableExists)); preparedTablesCache.put(name, true); // Done return; } // Expiration Date now = new Date(); Calendar cal = Calendar.getInstance(); cal.setTime(now); cal.add(Calendar.HOUR, Integer.parseInt(getSinkVarOrDefault("data_retention_hours", "168"))); // x hours persistent long expirationTime = cal.getTimeInMillis(); LOG.info("Expiration set to " + new Date(expirationTime).toString()); // Table definition TableSchema schema = new TableSchema(); List<TableFieldSchema> tableFieldSchema = new ArrayList<TableFieldSchema>(); TableFieldSchema schemaEntryRaw = new TableFieldSchema(); schemaEntryRaw.setName("_raw"); schemaEntryRaw.setType("STRING"); tableFieldSchema.add(schemaEntryRaw); TableFieldSchema schemaEntryMsg = new TableFieldSchema(); schemaEntryMsg.setName("message"); schemaEntryMsg.setType("STRING"); tableFieldSchema.add(schemaEntryMsg); TableFieldSchema schemaEntryMsgType = new TableFieldSchema(); schemaEntryMsgType.setName("type"); schemaEntryMsgType.setType("INTEGER"); tableFieldSchema.add(schemaEntryMsgType); TableFieldSchema schemaEntryLabel = new TableFieldSchema(); schemaEntryLabel.setName("label"); schemaEntryLabel.setType("STRING"); tableFieldSchema.add(schemaEntryLabel); TableFieldSchema schemaEntryHost = new TableFieldSchema(); schemaEntryHost.setName("host"); schemaEntryHost.setType("STRING"); tableFieldSchema.add(schemaEntryHost); TableFieldSchema schemaEntryTime = new TableFieldSchema(); schemaEntryTime.setName("timestamp"); schemaEntryTime.setType("TIMESTAMP"); tableFieldSchema.add(schemaEntryTime); schema.setFields(tableFieldSchema); // Table Table table = new Table(); table.setSchema(schema); TableReference tableRef = new TableReference(); tableRef.setDatasetId(datasetId); tableRef.setProjectId(projectId); tableRef.setTableId(name); table.setExpirationTime(expirationTime); table.setTableReference(tableRef); // Create table try { bigquery.tables().insert(projectId, datasetId, table).execute(); LOG.info("Created table " + name); preparedTablesCache.put(name, true); } catch (Exception e) { LOG.error("Failed to create table", e); } } }