/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.hive.hcatalog.api; import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; import com.google.common.collect.Maps; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hive.hcatalog.common.HCatException; import org.apache.hive.hcatalog.data.schema.HCatFieldSchema; import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The HCatTable is a wrapper around org.apache.hadoop.hive.metastore.api.Table. */ public class HCatTable { private static final Logger LOG = LoggerFactory.getLogger(HCatTable.class); public static enum Type { MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW, INDEX_TABLE } /** * Attributes that can be compared between HCatTables. */ public static enum TableAttribute { COLUMNS, PARTITION_COLUMNS, INPUT_FORMAT, OUTPUT_FORMAT, SERDE, SERDE_PROPERTIES, STORAGE_HANDLER, LOCATION, TABLE_PROPERTIES, STATS // TODO: Handle replication of changes to Table-STATS. } /** * The default set of attributes that can be diffed between HCatTables. */ public static final EnumSet<TableAttribute> DEFAULT_COMPARISON_ATTRIBUTES = EnumSet.of(TableAttribute.COLUMNS, TableAttribute.INPUT_FORMAT, TableAttribute.OUTPUT_FORMAT, TableAttribute.SERDE, TableAttribute.SERDE_PROPERTIES, TableAttribute.STORAGE_HANDLER, TableAttribute.TABLE_PROPERTIES); /** * 2 HCatTables are considered equivalent if {@code lhs.diff(rhs).equals(NO_DIFF) == true; } */ public static final EnumSet<TableAttribute> NO_DIFF = EnumSet.noneOf(TableAttribute.class); public static final String DEFAULT_SERDE_CLASS = org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName(); public static final String DEFAULT_INPUT_FORMAT_CLASS = org.apache.hadoop.mapred.TextInputFormat.class.getName(); public static final String DEFAULT_OUTPUT_FORMAT_CLASS = org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat.class.getName(); private String dbName = MetaStoreUtils.DEFAULT_DATABASE_NAME; private String tableName; private HiveConf conf; private String tableType; private boolean isExternal; private List<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>(); private List<HCatFieldSchema> partCols = new ArrayList<HCatFieldSchema>(); private StorageDescriptor sd; private String fileFormat; private Map<String, String> tblProps = new HashMap<String, String>(); private String comment = ""; private String owner; public HCatTable(String dbName, String tableName) { this.dbName = StringUtils.isBlank(dbName)? MetaStoreUtils.DEFAULT_DATABASE_NAME : dbName; this.tableName = tableName; this.sd = new StorageDescriptor(); this.sd.setInputFormat(DEFAULT_INPUT_FORMAT_CLASS); this.sd.setOutputFormat(DEFAULT_OUTPUT_FORMAT_CLASS); this.sd.setSerdeInfo(new SerDeInfo()); this.sd.getSerdeInfo().setSerializationLib(DEFAULT_SERDE_CLASS); this.sd.getSerdeInfo().setParameters(new HashMap<String, String>()); this.sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); // Default serialization format. } HCatTable(Table hiveTable) throws HCatException { tableName = hiveTable.getTableName(); dbName = hiveTable.getDbName(); tableType = hiveTable.getTableType(); isExternal = hiveTable.getTableType().equals(TableType.EXTERNAL_TABLE.toString()); sd = hiveTable.getSd(); for (FieldSchema colFS : sd.getCols()) { cols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); } partCols = new ArrayList<HCatFieldSchema>(); for (FieldSchema colFS : hiveTable.getPartitionKeys()) { partCols.add(HCatSchemaUtils.getHCatFieldSchema(colFS)); } if (hiveTable.getParameters() != null) { tblProps.putAll(hiveTable.getParameters()); } if (StringUtils.isNotBlank(tblProps.get("comment"))) { comment = tblProps.get("comment"); } owner = hiveTable.getOwner(); } Table toHiveTable() throws HCatException { Table newTable = new Table(); newTable.setDbName(dbName); newTable.setTableName(tableName); if (tblProps != null) { newTable.setParameters(tblProps); } if (isExternal) { newTable.putToParameters("EXTERNAL", "TRUE"); newTable.setTableType(TableType.EXTERNAL_TABLE.toString()); } else { newTable.setTableType(TableType.MANAGED_TABLE.toString()); } if (StringUtils.isNotBlank(this.comment)) { newTable.putToParameters("comment", comment); } newTable.setSd(sd); if (partCols != null) { ArrayList<FieldSchema> hivePtnCols = new ArrayList<FieldSchema>(); for (HCatFieldSchema fs : partCols) { hivePtnCols.add(HCatSchemaUtils.getFieldSchema(fs)); } newTable.setPartitionKeys(hivePtnCols); } newTable.setCreateTime((int) (System.currentTimeMillis() / 1000)); newTable.setLastAccessTimeIsSet(false); try { // TODO: Verify that this works for systems using UGI.doAs() (e.g. Oozie). newTable.setOwner(owner == null? getConf().getUser() : owner); } catch (Exception exception) { throw new HCatException("Unable to determine owner of table (" + dbName + "." + tableName + ") from HiveConf."); } return newTable; } void setConf(Configuration conf) { if (conf instanceof HiveConf) { this.conf = (HiveConf)conf; } else { this.conf = new HiveConf(conf, getClass()); } } HiveConf getConf() { if (conf == null) { LOG.warn("Conf hasn't been set yet. Using defaults."); conf = new HiveConf(); } return conf; } StorageDescriptor getSd() { return sd; } /** * Gets the table name. * * @return the table name */ public String getTableName() { return tableName; } /** * Setter for TableName. */ public HCatTable tableName(String tableName) { this.tableName = tableName; return this; } /** * Gets the db name. * * @return the db name */ public String getDbName() { return dbName; } /** * Setter for db-name. */ public HCatTable dbName(String dbName) { this.dbName = dbName; return this; } /** * Gets the columns. * * @return the columns */ public List<HCatFieldSchema> getCols() { return cols; } /** * Setter for Column schemas. */ public HCatTable cols(List<HCatFieldSchema> cols) { if (!this.cols.equals(cols)) { this.cols.clear(); this.cols.addAll(cols); this.sd.setCols(HCatSchemaUtils.getFieldSchemas(cols)); } return this; } /** * Gets the part columns. * * @return the part columns */ public List<HCatFieldSchema> getPartCols() { return partCols; } /** * Setter for list of partition columns. */ public HCatTable partCols(List<HCatFieldSchema> partCols) { this.partCols = partCols; return this; } /** * Setter for individual partition columns. */ public HCatTable partCol(HCatFieldSchema partCol) { if (this.partCols == null) { this.partCols = new ArrayList<HCatFieldSchema>(); } this.partCols.add(partCol); return this; } /** * Gets the bucket columns. * * @return the bucket columns */ public List<String> getBucketCols() { return this.sd.getBucketCols(); } /** * Setter for list of bucket columns. */ public HCatTable bucketCols(List<String> bucketCols) { this.sd.setBucketCols(bucketCols); return this; } /** * Gets the sort columns. * * @return the sort columns */ public List<Order> getSortCols() { return this.sd.getSortCols(); } /** * Setter for Sort-cols. */ public HCatTable sortCols(List<Order> sortCols) { this.sd.setSortCols(sortCols); return this; } /** * Gets the number of buckets. * * @return the number of buckets */ public int getNumBuckets() { return this.sd.getNumBuckets(); } /** * Setter for number of buckets. */ public HCatTable numBuckets(int numBuckets) { this.sd.setNumBuckets(numBuckets); return this; } /** * Gets the storage handler. * * @return the storage handler */ public String getStorageHandler() { return this.tblProps.get(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE); } /** * Setter for StorageHandler class. */ public HCatTable storageHandler(String storageHandler) throws HCatException { this.tblProps.put( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, storageHandler); LOG.warn("HiveStorageHandlers can't be reliably instantiated on the client-side. " + "Attempting to derive Input/OutputFormat settings from StorageHandler, on best effort: "); try { HiveStorageHandler sh = HiveUtils.getStorageHandler(getConf(), storageHandler); this.sd.setInputFormat(sh.getInputFormatClass().getName()); this.sd.setOutputFormat(sh.getOutputFormatClass().getName()); this.sd.getSerdeInfo().setSerializationLib(sh.getSerDeClass().getName()); } catch (HiveException e) { LOG.warn("Could not derive Input/OutputFormat and SerDe settings from storageHandler. " + "These values need to be set explicitly.", e); } return this; } /** * Gets the table props. * * @return the table props */ public Map<String, String> getTblProps() { return tblProps; } /** * Setter for TableProperty map. */ public HCatTable tblProps(Map<String, String> tblProps) { if (!this.tblProps.equals(tblProps)) { this.tblProps.clear(); this.tblProps.putAll(tblProps); } return this; } /** * Gets the tableType. * * @return the tableType */ public String getTabletype() { return tableType; } /** * Setter for table-type. */ public HCatTable tableType(Type tableType) { this.tableType = tableType.name(); this.isExternal = tableType.equals(Type.EXTERNAL_TABLE); return this; } private SerDeInfo getSerDeInfo() { if (!sd.isSetSerdeInfo()) { sd.setSerdeInfo(new SerDeInfo()); } return sd.getSerdeInfo(); } public HCatTable fileFormat(String fileFormat) { this.fileFormat = fileFormat; if (fileFormat.equalsIgnoreCase("sequencefile")) { inputFileFormat(SequenceFileInputFormat.class.getName()); outputFileFormat(HiveSequenceFileOutputFormat.class.getName()); serdeLib(LazySimpleSerDe.class.getName()); } else if (fileFormat.equalsIgnoreCase("rcfile")) { inputFileFormat(RCFileInputFormat.class.getName()); outputFileFormat(RCFileOutputFormat.class.getName()); serdeLib(LazyBinaryColumnarSerDe.class.getName()); } else if (fileFormat.equalsIgnoreCase("orcfile")) { inputFileFormat(OrcInputFormat.class.getName()); outputFileFormat(OrcOutputFormat.class.getName()); serdeLib(OrcSerde.class.getName()); } return this; } public String fileFormat() { return fileFormat; } /** * Gets the input file format. * * @return the input file format */ public String getInputFileFormat() { return sd.getInputFormat(); } /** * Setter for InputFormat class. */ public HCatTable inputFileFormat(String inputFileFormat) { sd.setInputFormat(inputFileFormat); return this; } /** * Gets the output file format. * * @return the output file format */ public String getOutputFileFormat() { return sd.getOutputFormat(); } /** * Setter for OutputFormat class. */ public HCatTable outputFileFormat(String outputFileFormat) { this.sd.setOutputFormat(outputFileFormat); return this; } /** * Gets the serde lib. * * @return the serde lib */ public String getSerdeLib() { return getSerDeInfo().getSerializationLib(); } /** * Setter for SerDe class name. */ public HCatTable serdeLib(String serde) { getSerDeInfo().setSerializationLib(serde); return this; } public HCatTable serdeParams(Map<String, String> serdeParams) { getSerDeInfo().setParameters(serdeParams); return this; } public HCatTable serdeParam(String paramName, String value) { SerDeInfo serdeInfo = getSerDeInfo(); if (serdeInfo.getParameters() == null) { serdeInfo.setParameters(new HashMap<String, String>()); } serdeInfo.getParameters().put(paramName, value); return this; } /** * Returns parameters such as field delimiter,etc. */ public Map<String, String> getSerdeParams() { return getSerDeInfo().getParameters(); } /** * Gets the location. * * @return the location */ public String getLocation() { return sd.getLocation(); } /** * Setter for location. */ public HCatTable location(String location) { this.sd.setLocation(location); return this; } /** * Getter for table-owner. */ public String owner() { return owner; } /** * Setter for table-owner. */ public HCatTable owner(String owner) { this.owner = owner; return this; } public String comment() { return this.comment; } /** * Setter for table-level comment. */ public HCatTable comment(String comment) { this.comment = comment; return this; } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable fieldsTerminatedBy(char delimiter) { return serdeParam(serdeConstants.FIELD_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable escapeChar(char escapeChar) { return serdeParam(serdeConstants.ESCAPE_CHAR, Character.toString(escapeChar)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable collectionItemsTerminatedBy(char delimiter) { return serdeParam(serdeConstants.COLLECTION_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable mapKeysTerminatedBy(char delimiter) { return serdeParam(serdeConstants.MAPKEY_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable linesTerminatedBy(char delimiter) { return serdeParam(serdeConstants.LINE_DELIM, Character.toString(delimiter)); } /** * See <i>row_format</i> element of CREATE_TABLE DDL for Hive. */ public HCatTable nullDefinedAs(char nullChar) { return serdeParam(serdeConstants.SERIALIZATION_NULL_FORMAT, Character.toString(nullChar)); } @Override public String toString() { return "HCatTable [ " + "tableName=" + tableName + ", " + "dbName=" + dbName + ", " + "tableType=" + tableType + ", " + "cols=" + cols + ", " + "partCols=" + partCols + ", " + "bucketCols=" + getBucketCols() + ", " + "numBuckets=" + getNumBuckets() + ", " + "sortCols=" + getSortCols() + ", " + "inputFormat=" + getInputFileFormat() + ", " + "outputFormat=" + getOutputFileFormat() + ", " + "storageHandler=" + getStorageHandler() + ", " + "serde=" + getSerdeLib() + ", " + "tblProps=" + getTblProps() + ", " + "location=" + getLocation() + ", " + "owner=" + owner() + " ]"; } /** * Method to compare the attributes of 2 HCatTable instances. * @param rhs The other table being compared against. Can't be null. * @param attributesToCheck The list of TableAttributes being compared. * @return {@code EnumSet<TableAttribute>} containing all the attribute that differ between {@code this} and rhs. * Subset of {@code attributesToCheck}. */ public EnumSet<TableAttribute> diff(HCatTable rhs, EnumSet<TableAttribute> attributesToCheck) { EnumSet<TableAttribute> theDiff = EnumSet.noneOf(TableAttribute.class); for (TableAttribute attribute : attributesToCheck) { if (attribute.equals(TableAttribute.COLUMNS)) { if (!rhs.getCols().containsAll(getCols()) || !getCols().containsAll(rhs.getCols())) { theDiff.add(TableAttribute.COLUMNS); } } if (attribute.equals(TableAttribute.INPUT_FORMAT)) { if ((getInputFileFormat() == null && rhs.getInputFileFormat() != null) || (getInputFileFormat() != null && (rhs.getInputFileFormat() == null || !rhs.getInputFileFormat().equals(getInputFileFormat())))) { theDiff.add(TableAttribute.INPUT_FORMAT); } } if (attribute.equals(TableAttribute.OUTPUT_FORMAT)) { if ((getOutputFileFormat() == null && rhs.getOutputFileFormat() != null) || (getOutputFileFormat() != null && (rhs.getOutputFileFormat() == null || !rhs.getOutputFileFormat().equals(getOutputFileFormat())))) { theDiff.add(TableAttribute.OUTPUT_FORMAT); } } if (attribute.equals(TableAttribute.STORAGE_HANDLER)) { if ((getStorageHandler() == null && rhs.getStorageHandler() != null) || (getStorageHandler() != null && (rhs.getStorageHandler() == null || !rhs.getStorageHandler().equals(getStorageHandler())))) { theDiff.add(TableAttribute.STORAGE_HANDLER); } } if (attribute.equals(TableAttribute.SERDE)) { if ((getSerdeLib() == null && rhs.getSerdeLib() != null) || (getSerdeLib() != null && (rhs.getSerdeLib() == null || !rhs.getSerdeLib().equals(getSerdeLib())))) { theDiff.add(TableAttribute.SERDE); } } if (attribute.equals(TableAttribute.SERDE_PROPERTIES)) { if (!equivalent(sd.getSerdeInfo().getParameters(), rhs.sd.getSerdeInfo().getParameters())) { theDiff.add(TableAttribute.SERDE_PROPERTIES); } } if (attribute.equals(TableAttribute.TABLE_PROPERTIES)) { if (!equivalent(tblProps, rhs.tblProps)) { theDiff.add(TableAttribute.TABLE_PROPERTIES); } } } return theDiff; } /** * Helper method to compare 2 Map instances, for equivalence. * @param lhs First map to be compared. * @param rhs Second map to be compared. * @return true, if the 2 Maps contain the same entries. */ private static boolean equivalent(Map<String, String> lhs, Map<String, String> rhs) { return lhs.size() == rhs.size() && Maps.difference(lhs, rhs).areEqual(); } /** * Method to compare the attributes of 2 HCatTable instances. * Only the {@code DEFAULT_COMPARISON_ATTRIBUTES} are compared. * @param rhs The other table being compared against. Can't be null. * @return {@code EnumSet<TableAttribute>} containing all the attribute that differ between {@code this} and rhs. * Subset of {@code DEFAULT_COMPARISON_ATTRIBUTES}. */ public EnumSet<TableAttribute> diff (HCatTable rhs) { return diff(rhs, DEFAULT_COMPARISON_ATTRIBUTES); } /** * Method to "adopt" the specified attributes from rhs into this HCatTable object. * @param rhs The "source" table from which attributes are to be copied from. * @param attributes The set of attributes to be copied from rhs. Usually the result of {@code this.diff(rhs)}. * @return This HCatTable * @throws HCatException */ public HCatTable resolve(HCatTable rhs, EnumSet<TableAttribute> attributes) throws HCatException { if (rhs == this) return this; for (TableAttribute attribute : attributes) { if (attribute.equals(TableAttribute.COLUMNS)) { cols(rhs.cols); } if (attribute.equals(TableAttribute.INPUT_FORMAT)) { inputFileFormat(rhs.getInputFileFormat()); } if (attribute.equals(TableAttribute.OUTPUT_FORMAT)) { outputFileFormat(rhs.getOutputFileFormat()); } if (attribute.equals(TableAttribute.SERDE)) { serdeLib(rhs.getSerdeLib()); } if (attribute.equals(TableAttribute.SERDE_PROPERTIES)) { serdeParams(rhs.getSerdeParams()); } if (attribute.equals(TableAttribute.STORAGE_HANDLER)) { storageHandler(rhs.getStorageHandler()); } if (attribute.equals(TableAttribute.TABLE_PROPERTIES)) { tblProps(rhs.tblProps); } } return this; } }