/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.metadata; import java.util.Map; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; /** * HiveStorageHandler defines a pluggable interface for adding * new storage handlers to Hive. A storage handler consists of * a bundle of the following: * *<ul> *<li>input format *<li>output format *<li>serde *<li>metadata hooks for keeping an external catalog in sync * with Hive's metastore *<li>rules for setting up the configuration properties on * map/reduce jobs which access tables stored by this handler *</ul> * * Storage handler classes are plugged in using the STORED BY 'classname' * clause in CREATE TABLE. */ public interface HiveStorageHandler extends Configurable { /** * @return Class providing an implementation of {@link InputFormat} */ public Class<? extends InputFormat> getInputFormatClass(); /** * @return Class providing an implementation of {@link OutputFormat} */ public Class<? extends OutputFormat> getOutputFormatClass(); /** * @return Class providing an implementation of {@link AbstractSerDe} */ public Class<? extends AbstractSerDe> getSerDeClass(); /** * @return metadata hook implementation, or null if this * storage handler does not need any metadata notifications */ public HiveMetaHook getMetaHook(); /** * Returns the implementation specific authorization provider * * @return authorization provider * @throws HiveException */ public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException; /** * This method is called to allow the StorageHandlers the chance * to populate the JobContext.getConfiguration() with properties that * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). * Key value pairs passed into jobProperties are guaranteed to be set in the job's * configuration object. User's can retrieve "context" information from tableDesc. * User's should avoid mutating tableDesc and only make changes in jobProperties. * This method is expected to be idempotent such that a job called with the * same tableDesc values should return the same key-value pairs in jobProperties. * Any external state set by this method should remain the same if this method is * called again. It is up to the user to determine how best guarantee this invariant. * * This method in particular is to create a configuration for input. * @param tableDesc descriptor for the table being accessed * @param jobProperties receives properties copied or transformed * from the table properties */ public abstract void configureInputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties); /** * This method is called to allow the StorageHandlers the chance to * populate secret keys into the job's credentials. */ public abstract void configureInputJobCredentials(TableDesc tableDesc, Map<String, String> secrets); /** * This method is called to allow the StorageHandlers the chance * to populate the JobContext.getConfiguration() with properties that * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). * Key value pairs passed into jobProperties are guaranteed to be set in the job's * configuration object. User's can retrieve "context" information from tableDesc. * User's should avoid mutating tableDesc and only make changes in jobProperties. * This method is expected to be idempotent such that a job called with the * same tableDesc values should return the same key-value pairs in jobProperties. * Any external state set by this method should remain the same if this method is * called again. It is up to the user to determine how best guarantee this invariant. * * This method in particular is to create a configuration for output. * @param tableDesc descriptor for the table being accessed * @param jobProperties receives properties copied or transformed * from the table properties */ public abstract void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> jobProperties); /** * Deprecated use configureInputJobProperties/configureOutputJobProperties * methods instead. * * Configures properties for a job based on the definition of the * source or target table it accesses. * * @param tableDesc descriptor for the table being accessed * * @param jobProperties receives properties copied or transformed * from the table properties */ @Deprecated public void configureTableJobProperties( TableDesc tableDesc, Map<String, String> jobProperties); /** * Called just before submitting MapReduce job. * * @param tableDesc descriptor for the table being accessed * @param JobConf jobConf for MapReduce job */ public void configureJobConf(TableDesc tableDesc, JobConf jobConf); }