/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.hbase; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Constants; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer; import org.apache.hadoop.hive.ql.index.IndexSearchCondition; import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.util.StringUtils; /** * HBaseStorageHandler provides a HiveStorageHandler implementation for * HBase. */ public class HBaseStorageHandler extends DefaultStorageHandler implements HiveMetaHook, HiveStoragePredicateHandler { private HBaseConfiguration hbaseConf; private HBaseAdmin admin; private HBaseAdmin getHBaseAdmin() throws MetaException { try { if (admin == null) { admin = new HBaseAdmin(hbaseConf); } return admin; } catch (MasterNotRunningException mnre) { throw new MetaException(StringUtils.stringifyException(mnre)); } } private String getHBaseTableName(Table tbl) { // Give preference to TBLPROPERTIES over SERDEPROPERTIES // (really we should only use TBLPROPERTIES, so this is just // for backwards compatibility with the original specs). String tableName = tbl.getParameters().get(HBaseSerDe.HBASE_TABLE_NAME); if (tableName == null) { tableName = tbl.getSd().getSerdeInfo().getParameters().get( HBaseSerDe.HBASE_TABLE_NAME); } if (tableName == null) { tableName = tbl.getTableName(); } return tableName; } @Override public void preDropTable(Table table) throws MetaException { // nothing to do } @Override public void rollbackDropTable(Table table) throws MetaException { // nothing to do } @Override public void commitDropTable( Table tbl, boolean deleteData) throws MetaException { try { String tableName = getHBaseTableName(tbl); boolean isExternal = MetaStoreUtils.isExternalTable(tbl); if (deleteData && !isExternal) { if (getHBaseAdmin().isTableEnabled(tableName)) { getHBaseAdmin().disableTable(tableName); } getHBaseAdmin().deleteTable(tableName); } } catch (IOException ie) { throw new MetaException(StringUtils.stringifyException(ie)); } } @Override public void preCreateTable(Table tbl) throws MetaException { boolean isExternal = MetaStoreUtils.isExternalTable(tbl); // We'd like to move this to HiveMetaStore for any non-native table, but // first we need to support storing NULL for location on a table if (tbl.getSd().getLocation() != null) { throw new MetaException("LOCATION may not be specified for HBase."); } try { String tableName = getHBaseTableName(tbl); Map<String, String> serdeParam = tbl.getSd().getSerdeInfo().getParameters(); String hbaseColumnsMapping = serdeParam.get(HBaseSerDe.HBASE_COLUMNS_MAPPING); if (hbaseColumnsMapping == null) { throw new MetaException("No hbase.columns.mapping defined in Serde."); } List<String> hbaseColumnFamilies = new ArrayList<String>(); List<String> hbaseColumnQualifiers = new ArrayList<String>(); List<byte []> hbaseColumnFamiliesBytes = new ArrayList<byte []>(); List<byte []> hbaseColumnQualifiersBytes = new ArrayList<byte []>(); int iKey = HBaseSerDe.parseColumnMapping(hbaseColumnsMapping, hbaseColumnFamilies, hbaseColumnFamiliesBytes, hbaseColumnQualifiers, hbaseColumnQualifiersBytes); HTableDescriptor tableDesc; if (!getHBaseAdmin().tableExists(tableName)) { // if it is not an external table then create one if (!isExternal) { // Create the column descriptors tableDesc = new HTableDescriptor(tableName); Set<String> uniqueColumnFamilies = new HashSet<String>(hbaseColumnFamilies); uniqueColumnFamilies.remove(hbaseColumnFamilies.get(iKey)); for (String columnFamily : uniqueColumnFamilies) { tableDesc.addFamily(new HColumnDescriptor(Bytes.toBytes(columnFamily))); } getHBaseAdmin().createTable(tableDesc); } else { // an external table throw new MetaException("HBase table " + tableName + " doesn't exist while the table is declared as an external table."); } } else { if (!isExternal) { throw new MetaException("Table " + tableName + " already exists" + " within HBase; use CREATE EXTERNAL TABLE instead to" + " register it in Hive."); } // make sure the schema mapping is right tableDesc = getHBaseAdmin().getTableDescriptor(Bytes.toBytes(tableName)); for (int i = 0; i < hbaseColumnFamilies.size(); i++) { if (i == iKey) { continue; } if (!tableDesc.hasFamily(hbaseColumnFamiliesBytes.get(i))) { throw new MetaException("Column Family " + hbaseColumnFamilies.get(i) + " is not defined in hbase table " + tableName); } } } // ensure the table is online new HTable(hbaseConf, tableDesc.getName()); } catch (MasterNotRunningException mnre) { throw new MetaException(StringUtils.stringifyException(mnre)); } catch (IOException ie) { throw new MetaException(StringUtils.stringifyException(ie)); } catch (SerDeException se) { throw new MetaException(StringUtils.stringifyException(se)); } } @Override public void rollbackCreateTable(Table table) throws MetaException { boolean isExternal = MetaStoreUtils.isExternalTable(table); String tableName = getHBaseTableName(table); try { if (!isExternal && getHBaseAdmin().tableExists(tableName)) { // we have created an HBase table, so we delete it to roll back; if (getHBaseAdmin().isTableEnabled(tableName)) { getHBaseAdmin().disableTable(tableName); } getHBaseAdmin().deleteTable(tableName); } } catch (IOException ie) { throw new MetaException(StringUtils.stringifyException(ie)); } } @Override public void commitCreateTable(Table table) throws MetaException { // nothing to do } @Override public Configuration getConf() { return hbaseConf; } @Override public void setConf(Configuration conf) { hbaseConf = new HBaseConfiguration(conf); } @Override public Class<? extends InputFormat> getInputFormatClass() { return HiveHBaseTableInputFormat.class; } @Override public Class<? extends OutputFormat> getOutputFormatClass() { return HiveHBaseTableOutputFormat.class; } @Override public Class<? extends SerDe> getSerDeClass() { return HBaseSerDe.class; } @Override public HiveMetaHook getMetaHook() { return this; } @Override public void configureTableJobProperties( TableDesc tableDesc, Map<String, String> jobProperties) { Properties tableProperties = tableDesc.getProperties(); jobProperties.put( HBaseSerDe.HBASE_COLUMNS_MAPPING, tableProperties.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING)); String tableName = tableProperties.getProperty(HBaseSerDe.HBASE_TABLE_NAME); if (tableName == null) { tableName = tableProperties.getProperty(Constants.META_TABLE_NAME); } jobProperties.put(HBaseSerDe.HBASE_TABLE_NAME, tableName); } @Override public DecomposedPredicate decomposePredicate( JobConf jobConf, Deserializer deserializer, ExprNodeDesc predicate) { String columnNameProperty = jobConf.get( org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS); List<String> columnNames = Arrays.asList(columnNameProperty.split(",")); HBaseSerDe hbaseSerde = (HBaseSerDe) deserializer; IndexPredicateAnalyzer analyzer = HiveHBaseTableInputFormat.newIndexPredicateAnalyzer( columnNames.get(hbaseSerde.getKeyColumnOffset())); List<IndexSearchCondition> searchConditions = new ArrayList<IndexSearchCondition>(); ExprNodeDesc residualPredicate = analyzer.analyzePredicate(predicate, searchConditions); if (searchConditions.size() != 1) { // Either there was nothing which could be pushed down (size = 0), // or more than one predicate (size > 1); in the latter case, // we bail out for now since multiple lookups on the key are // either contradictory or redundant. We'll need to handle // this better later when we support more interesting predicates. return null; } DecomposedPredicate decomposedPredicate = new DecomposedPredicate(); decomposedPredicate.pushedPredicate = analyzer.translateSearchConditions( searchConditions); decomposedPredicate.residualPredicate = residualPredicate; return decomposedPredicate; } }