/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.index; import java.util.List; import java.util.Set; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Index; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * HiveIndexHandler defines a pluggable interface for adding new index handlers * to Hive. */ public interface HiveIndexHandler extends Configurable { /** * Determines whether this handler implements indexes by creating an index * table. * * @return true if index creation implies creation of an index table in Hive; * false if the index representation is not stored in a Hive table */ boolean usesIndexTable(); /** * Requests that the handler validate an index definition and fill in * additional information about its stored representation. * * @param baseTable * the definition of the table being indexed * * @param index * the definition of the index being created * * @param indexTable * a partial definition of the index table to be used for storing the * index representation, or null if usesIndexTable() returns false; * the handler can augment the index's storage descriptor (e.g. with * information about input/output format) and/or the index table's * definition (typically with additional columns containing the index * representation, e.g. pointers into HDFS). * * @throws HiveException if the index definition is invalid with respect to * either the base table or the supplied index table definition */ void analyzeIndexDefinition( org.apache.hadoop.hive.metastore.api.Table baseTable, org.apache.hadoop.hive.metastore.api.Index index, org.apache.hadoop.hive.metastore.api.Table indexTable) throws HiveException; /** * Requests that the handler generate a plan for building the index; the plan * should read the base table and write out the index representation. * * @param baseTbl * the definition of the table being indexed * * @param index * the definition of the index * * @param baseTblPartitions * list of base table partitions with each element mirrors to the * corresponding one in indexTblPartitions * * @param indexTbl * the definition of the index table, or null if usesIndexTable() * returns null * * @param inputs * inputs for hooks, supplemental outputs going * along with the return value * * @param outputs * outputs for hooks, supplemental outputs going * along with the return value * * @return list of tasks to be executed in parallel for building the index * * @throws HiveException if plan generation fails */ List<Task<?>> generateIndexBuildTaskList( org.apache.hadoop.hive.ql.metadata.Table baseTbl, org.apache.hadoop.hive.metastore.api.Index index, List<Partition> indexTblPartitions, List<Partition> baseTblPartitions, org.apache.hadoop.hive.ql.metadata.Table indexTbl, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws HiveException; /** * Generate the list of tasks required to run an index optimized sub-query for the * given predicate, using the given indexes. If multiple indexes are * provided, it is up to the handler whether to use none, one, some or all of * them. The supplied predicate may reference any of the columns from any of * the indexes. If the handler decides to use more than one index, it is * responsible for generating tasks to combine their search results * (e.g. performing a JOIN on the result). * @param indexes * @param predicate * @param pctx * @param queryContext contains results, such as query tasks and input configuration */ void generateIndexQuery(List<Index> indexes, ExprNodeDesc predicate, ParseContext pctx, HiveIndexQueryContext queryContext); /** * Check the size of an input query to make sure it fits within the bounds * * @param inputSize size (in bytes) of the query in question * @param conf * @return true if query is within the bounds */ boolean checkQuerySize(long inputSize, HiveConf conf); }