/* * Copyright © 2014 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.explore.service; import co.cask.cdap.proto.ColumnDesc; import co.cask.cdap.proto.Id; import co.cask.cdap.proto.QueryHandle; import co.cask.cdap.proto.QueryInfo; import co.cask.cdap.proto.QueryResult; import co.cask.cdap.proto.QueryStatus; import co.cask.cdap.proto.TableInfo; import co.cask.cdap.proto.TableNameInfo; import java.sql.DatabaseMetaData; import java.sql.SQLException; import java.util.List; import javax.annotation.Nullable; /** * Interface for exploring datasets. */ public interface Explore { /** * Execute a Hive SQL statement asynchronously. The returned {@link QueryHandle} can be used to get the * status/result of the operation. * * @param namespace namespace to run the query in. * @param statement SQL statement. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error executing statement. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle execute(Id.Namespace namespace, String statement) throws ExploreException, SQLException; /** * Fetch the status of a running Hive operation. * * @param handle handle returned by {@link #execute(Id.Namespace, String)}. * @return status of the operation. * @throws ExploreException on any error fetching status. * @throws HandleNotFoundException when handle is not found. * @throws SQLException if there are errors in the SQL statement. */ QueryStatus getStatus(QueryHandle handle) throws ExploreException, HandleNotFoundException, SQLException; /** * Fetch the schema of the result of a Hive operation. This can be called only after the state of the operation is * {@link QueryStatus.OpStatus#FINISHED}. * * @param handle handle returned by {@link #execute(Id.Namespace, String)}. * @return list of {@link ColumnDesc} representing the schema of the results. Empty list if there are no results. * @throws ExploreException on any error fetching schema. * @throws HandleNotFoundException when handle is not found. * @throws SQLException if there are errors in the SQL statement. */ List<ColumnDesc> getResultSchema(QueryHandle handle) throws ExploreException, HandleNotFoundException, SQLException; /** * Fetch the results of a Hive operation. This can be called only after the state of the operation is * {@link QueryStatus.OpStatus#FINISHED}. Can be called multiple times, until it returns an empty list * indicating the end of results. * * @param handle handle returned by {@link #execute(Id.Namespace, String)}. * @param size max rows to fetch in the call. * @return list of {@link QueryResult}s. * @throws ExploreException on any error fetching results. * @throws HandleNotFoundException when handle is not found. * @throws SQLException if there are errors in the SQL statement. */ List<QueryResult> nextResults(QueryHandle handle, int size) throws ExploreException, HandleNotFoundException, SQLException; /** * Fetch a preview of the results of a Hive operation. This can be called only after the state of the operation is * {@link QueryStatus.OpStatus#FINISHED}. Two subsequent calls to this methods will return the same list of results. * * @param handle handle returned by {@link #execute(Id.Namespace, String)}. * @return preview list of {@link QueryResult}s. * @throws ExploreException on any error fetching a preview of the results. * @throws HandleNotFoundException when handle is not found. * @throws SQLException if there are errors in the SQL statement. */ List<QueryResult> previewResults(QueryHandle handle) throws ExploreException, HandleNotFoundException, SQLException; /** * Release resources associated with a Hive operation. After this call, handle of the operation becomes invalid. * * @param handle handle returned by {@link #execute(Id.Namespace, String)}. * @throws ExploreException on any error closing operation. * @throws HandleNotFoundException when handle is not found. */ void close(QueryHandle handle) throws ExploreException, HandleNotFoundException; /** * Fetch information about queries executed in Hive. * * @return List of {@link QueryInfo} * @throws ExploreException * @param namespace namespace to get queries in. */ List<QueryInfo> getQueries(Id.Namespace namespace) throws ExploreException, SQLException; /** * Get the number of active queries being executed in Hive, specifically * queries that are being executed or completed queries that have not yet been closed. * * @param namespace namespace to count queries in. * @return the number of live queries being executed. * @throws ExploreException */ int getActiveQueryCount(Id.Namespace namespace) throws ExploreException; ////// Metadata methods /** * Retrieves a description of table columns available in the specified catalog. * Only column descriptions matching the catalog, schema, table and column name criteria are returned. * * See {@link DatabaseMetaData#getColumns(String, String, String, String)}. * * @param catalog a catalog name; must match the catalog name as it is stored in the database; * "" retrieves those without a catalog; * null means that the catalog name should not be used to narrow the search. * @param schemaPattern a schema name pattern; must match the schema name as it is stored in the database; * "" retrieves those without a schema; * null means that the schema name should not be used to narrow the search. * @param tableNamePattern a table name pattern; must match the table name as it is stored in the database. * @param columnNamePattern a column name pattern; must match the column name as it is stored in the database. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the columns. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getColumns(@Nullable String catalog, @Nullable String schemaPattern, String tableNamePattern, String columnNamePattern) throws ExploreException, SQLException; /** * Retrieves the catalog names available in this database. * * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the columns. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getCatalogs() throws ExploreException, SQLException; /** * Retrieves the schema names available in this database. * * See {@link DatabaseMetaData#getSchemas(String, String)}. * * @param catalog a catalog name; must match the catalog name as it is stored in the database; * "" retrieves those without a catalog; * null means that the catalog name should not be used to narrow the search. * @param schemaPattern a schema name pattern; must match the schema name as it is stored in the database; * "" retrieves those without a schema; * null means that the schema name should not be used to narrow the search. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the schemas. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getSchemas(@Nullable String catalog, @Nullable String schemaPattern) throws ExploreException, SQLException; /** * Retrieves a description of the system and user functions available in the given catalog. * Only system and user function descriptions matching the schema and function name criteria are returned. * * See {@link DatabaseMetaData#getFunctions(String, String, String)}. * * @param catalog a catalog name; must match the catalog name as it is stored in the database; * "" retrieves those without a catalog; * null means that the catalog name should not be used to narrow the search. * @param schemaPattern a schema name pattern; must match the schema name as it is stored in the database; * "" retrieves those without a schema; * null means that the schema name should not be used to narrow the search. * @param functionNamePattern a function name pattern; must match the function name as it is stored in the database * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the functions. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getFunctions(@Nullable String catalog, @Nullable String schemaPattern, String functionNamePattern) throws ExploreException, SQLException; /** * Get information about CDAP as a database. * * @param infoType information type we are interested about. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the information. * @throws SQLException if there are errors in the SQL statement. */ MetaDataInfo getInfo(MetaDataInfo.InfoType infoType) throws ExploreException, SQLException; /** * Retrieves a description of the tables available in the given catalog. Only table descriptions * matching the catalog, schema, table name and type criteria are returned. * * See {@link DatabaseMetaData#getTables(String, String, String, String[])}. * * @param catalog a catalog name; must match the catalog name as it is stored in the database; * "" retrieves those without a catalog; * null means that the catalog name should not be used to narrow the search. * @param schemaPattern a schema name pattern; must match the schema name as it is stored in the database; * "" retrieves those without a schema; * null means that the schema name should not be used to narrow the search. * @param tableNamePattern a table name pattern; must match the table name as it is stored in the database. * @param tableTypes a list of table types, which must come from * "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY", "LOCAL TEMPORARY", "ALIAS", "SYNONYM"; * null returns all types. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the tables. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getTables(@Nullable String catalog, @Nullable String schemaPattern, String tableNamePattern, @Nullable List<String> tableTypes) throws ExploreException, SQLException; /** * Retrieve a list of all the tables present in Hive Metastore that match the given database name. * * @param database database name from which to list the tables. The database has to be accessible by the current * user. If it is null, all the databases the user has access to will be inspected. * @return list of table names present in the database. * @throws ExploreException on any error getting the tables. */ List<TableNameInfo> getTables(@Nullable String database) throws ExploreException; /** * Get information about a Hive table. * * @param database name of the database the table belongs to. * @param table table name for which to get the schema. * @return information about a table. * @throws ExploreException on any error getting the tables. */ TableInfo getTableInfo(@Nullable String database, String table) throws ExploreException, TableNotFoundException; /** * Retrieves the table types available in this database. * * See {@link DatabaseMetaData#getTableTypes()}. * * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the table types. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getTableTypes() throws ExploreException, SQLException; /** * Retrieves a description of all the data types supported by this database. * * See {@link DatabaseMetaData#getTypeInfo()}. * * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any error getting the types info. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle getTypeInfo() throws ExploreException, SQLException; /** * Creates a new namespace in Explore. * * @param namespace namespace to create. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any errors creating the namespace. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle createNamespace(Id.Namespace namespace) throws ExploreException, SQLException; /** * Deletes a new namespace in Explore. * * @param namespace namespace to delete. * @return {@link QueryHandle} representing the operation. * @throws ExploreException on any errors deleting the namespace. * @throws SQLException if there are errors in the SQL statement. */ QueryHandle deleteNamespace(Id.Namespace namespace) throws ExploreException, SQLException; /** * Upgrades Explore if needed. This method must be implemented in an idempotent way. * * @throws Exception if there was an exception during the upgrade */ void upgrade() throws Exception; }