/* * Copyright 2016 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.zeppelin.bigquery; import static org.apache.commons.lang.StringUtils.containsIgnoreCase; import com.google.api.client.http.HttpTransport; import com.google.api.client.http.javanet.NetHttpTransport; import com.google.api.client.json.JsonFactory; import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; import com.google.api.client.json.jackson2.JacksonFactory; import com.google.api.services.bigquery.Bigquery; import com.google.api.services.bigquery.BigqueryScopes; import com.google.api.client.json.GenericJson; import com.google.api.services.bigquery.Bigquery.Datasets; import com.google.api.services.bigquery.BigqueryRequest; import com.google.api.services.bigquery.model.DatasetList; import com.google.api.services.bigquery.model.Job; import com.google.api.services.bigquery.model.TableCell; import com.google.api.services.bigquery.model.TableFieldSchema; import com.google.api.services.bigquery.model.TableRow; import com.google.api.services.bigquery.model.TableSchema; import com.google.api.services.bigquery.Bigquery.Jobs.GetQueryResults; import com.google.api.services.bigquery.model.GetQueryResultsResponse; import com.google.api.services.bigquery.model.QueryRequest; import com.google.api.services.bigquery.model.QueryResponse; import com.google.api.services.bigquery.model.JobCancelResponse; import com.google.gson.Gson; import java.io.IOException; import java.util.Collection; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Properties; import java.util.Set; import org.apache.zeppelin.interpreter.Interpreter; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.interpreter.InterpreterPropertyBuilder; import org.apache.zeppelin.interpreter.InterpreterResult; import org.apache.zeppelin.interpreter.InterpreterResult.Code; import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion; import org.apache.zeppelin.scheduler.Scheduler; import org.apache.zeppelin.scheduler.SchedulerFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.common.collect.Sets.SetView; import java.io.PrintStream; import java.io.Reader; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; /** * BigQuery interpreter for Zeppelin. * * <ul> * <li>{@code zeppelin.bigquery.project_id} - Project ID in GCP</li> * <li>{@code zeppelin.bigquery.wait_time} - Query Timeout in ms</li> * <li>{@code zeppelin.bigquery.max_no_of_rows} - Max Result size</li> * </ul> * * <p> * How to use: <br/> * {@code %bigquery.sql<br/> * {@code * SELECT departure_airport,count(case when departure_delay>0 then 1 else 0 end) as no_of_delays * FROM [bigquery-samples:airline_ontime_data.flights] * group by departure_airport * order by 2 desc * limit 10 * } * </p> * */ public class BigQueryInterpreter extends Interpreter { private Logger logger = LoggerFactory.getLogger(BigQueryInterpreter.class); private static final char NEWLINE = '\n'; private static final char TAB = '\t'; private static Bigquery service = null; //Mutex created to create the singleton in thread-safe fashion. private static Object serviceLock = new Object(); static final String PROJECT_ID = "zeppelin.bigquery.project_id"; static final String WAIT_TIME = "zeppelin.bigquery.wait_time"; static final String MAX_ROWS = "zeppelin.bigquery.max_no_of_rows"; private static String jobId = null; private static String projectId = null; private static final List NO_COMPLETION = new ArrayList<>(); private Exception exceptionOnConnect; private static final Function<CharSequence, String> sequenceToStringTransformer = new Function<CharSequence, String>() { public String apply(CharSequence seq) { return seq.toString(); } }; public BigQueryInterpreter(Properties property) { super(property); } //Function to return valid BigQuery Service @Override public void open() { if (service == null) { synchronized (serviceLock) { if (service == null) { try { service = createAuthorizedClient(); exceptionOnConnect = null; logger.info("Opened BigQuery SQL Connection"); } catch (IOException e) { logger.error("Cannot open connection", e); exceptionOnConnect = e; close(); } } } } } //Function that Creates an authorized client to Google Bigquery. private static Bigquery createAuthorizedClient() throws IOException { HttpTransport transport = new NetHttpTransport(); JsonFactory jsonFactory = new JacksonFactory(); GoogleCredential credential = GoogleCredential.getApplicationDefault(transport, jsonFactory); if (credential.createScopedRequired()) { Collection<String> bigqueryScopes = BigqueryScopes.all(); credential = credential.createScoped(bigqueryScopes); } return new Bigquery.Builder(transport, jsonFactory, credential) .setApplicationName("Zeppelin/1.0 (GPN:Apache Zeppelin;)").build(); } //Function that generates and returns the schema and the rows as string public static String printRows(final GetQueryResultsResponse response) { StringBuilder msg = null; msg = new StringBuilder(); try { for (TableFieldSchema schem: response.getSchema().getFields()) { msg.append(schem.getName()); msg.append(TAB); } msg.append(NEWLINE); for (TableRow row : response.getRows()) { for (TableCell field : row.getF()) { msg.append(field.getV().toString()); msg.append(TAB); } msg.append(NEWLINE); } return msg.toString(); } catch ( NullPointerException ex ) { throw new NullPointerException("SQL Execution returned an error!"); } } //Function to poll a job for completion. Future use public static Job pollJob(final Bigquery.Jobs.Get request, final long interval) throws IOException, InterruptedException { Job job = request.execute(); while (!job.getStatus().getState().equals("DONE")) { System.out.println("Job is " + job.getStatus().getState() + " waiting " + interval + " milliseconds..."); Thread.sleep(interval); job = request.execute(); } return job; } //Function to page through the results of an arbitrary bigQuery request public static <T extends GenericJson> Iterator<T> getPages( final BigqueryRequest<T> requestTemplate) { class PageIterator implements Iterator<T> { private BigqueryRequest<T> request; private boolean hasNext = true; public PageIterator(final BigqueryRequest<T> requestTemplate) { this.request = requestTemplate; } public boolean hasNext() { return hasNext; } public T next() { if (!hasNext) { throw new NoSuchElementException(); } try { T response = request.execute(); if (response.containsKey("pageToken")) { request = request.set("pageToken", response.get("pageToken")); } else { hasNext = false; } return response; } catch (IOException e) { return null; } } public void remove() { this.next(); } } return new PageIterator(requestTemplate); } //Function to call bigQuery to run SQL and return results to the Interpreter for output private InterpreterResult executeSql(String sql) { int counter = 0; StringBuilder finalmessage = null; finalmessage = new StringBuilder("%table "); String projId = getProperty(PROJECT_ID); long wTime = Long.parseLong(getProperty(WAIT_TIME)); long maxRows = Long.parseLong(getProperty(MAX_ROWS)); Iterator<GetQueryResultsResponse> pages; try { pages = run(sql, projId, wTime, maxRows); } catch ( IOException ex ) { logger.error(ex.getMessage()); return new InterpreterResult(Code.ERROR, ex.getMessage()); } try { while (pages.hasNext()) { finalmessage.append(printRows(pages.next())); } return new InterpreterResult(Code.SUCCESS, finalmessage.toString()); } catch ( NullPointerException ex ) { return new InterpreterResult(Code.ERROR, ex.getMessage()); } } //Function to run the SQL on bigQuery service public static Iterator<GetQueryResultsResponse> run(final String queryString, final String projId, final long wTime, final long maxRows) throws IOException { try { QueryResponse query = service.jobs().query( projId, new QueryRequest().setTimeoutMs(wTime).setQuery(queryString).setMaxResults(maxRows)) .execute(); jobId = query.getJobReference().getJobId(); projectId = query.getJobReference().getProjectId(); GetQueryResults getRequest = service.jobs().getQueryResults( projectId, jobId); return getPages(getRequest); } catch (IOException ex) { throw ex; } } @Override public void close() { logger.info("Close bqsql connection!"); service = null; } @Override public InterpreterResult interpret(String sql, InterpreterContext contextInterpreter) { logger.info("Run SQL command '{}'", sql); return executeSql(sql); } @Override public Scheduler getScheduler() { return SchedulerFactory.singleton().createOrGetFIFOScheduler( BigQueryInterpreter.class.getName() + this.hashCode()); } @Override public FormType getFormType() { return FormType.SIMPLE; } @Override public int getProgress(InterpreterContext context) { return 0; } @Override public void cancel(InterpreterContext context) { logger.info("Trying to Cancel current query statement."); if (service != null && jobId != null && projectId != null) { try { Bigquery.Jobs.Cancel request = service.jobs().cancel(projectId, jobId); JobCancelResponse response = request.execute(); jobId = null; logger.info("Query Execution cancelled"); } catch (IOException ex) { logger.error("Could not cancel the SQL execution"); } } else { logger.info("Query Execution was already cancelled"); } } @Override public List<InterpreterCompletion> completion(String buf, int cursor, InterpreterContext interpreterContext) { return NO_COMPLETION; } }