package org.apache.hadoop.hive.cassandra;
import java.util.Map;
import java.util.Properties;
import org.apache.cassandra.thrift.KsDef;
import org.apache.cassandra.thrift.NotFoundException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.cassandra.input.HiveCassandraStandardColumnInputFormat;
import org.apache.hadoop.hive.cassandra.output.HiveCassandraOutputFormat;
import org.apache.hadoop.hive.cassandra.serde.AbstractColumnSerDe;
import org.apache.hadoop.hive.cassandra.serde.CassandraColumnSerDe;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.Constants;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;
public class CassandraStorageHandler implements HiveStorageHandler, HiveMetaHook {
private Configuration configuration;
@Override
public void configureTableJobProperties(TableDesc tableDesc, Map<String, String> jobProperties) {
Properties tableProperties = tableDesc.getProperties();
// Try parsing the keyspace.columnFamily
String tableName = tableProperties.getProperty(Constants.META_TABLE_NAME);
String dbName = tableProperties.getProperty(Constants.META_TABLE_DB);
String keyspace = tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
String columnFamily = tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_CF_NAME);
//Identify Keyspace
if (keyspace == null) {
keyspace = dbName;
}
jobProperties.put(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME, keyspace);
//Identify ColumnFamily
if (columnFamily == null) {
columnFamily = tableName;
}
jobProperties.put(AbstractColumnSerDe.CASSANDRA_CF_NAME, columnFamily);
//If no column mapping has been configured, we should create the default column mapping.
String columnInfo = tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_COL_MAPPING);
if(columnInfo == null) {
columnInfo = AbstractColumnSerDe.createColumnMappingString(
tableProperties.getProperty(org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS));
}
jobProperties.put(AbstractColumnSerDe.CASSANDRA_COL_MAPPING, columnInfo);
if (configuration.get(AbstractColumnSerDe.CASSANDRA_HOST) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_HOST,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_HOST, AbstractColumnSerDe.DEFAULT_CASSANDRA_HOST));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_HOST, configuration.get(AbstractColumnSerDe.CASSANDRA_HOST));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_PORT) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_PORT,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_PORT, AbstractColumnSerDe.DEFAULT_CASSANDRA_PORT));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_PORT,configuration.get(AbstractColumnSerDe.CASSANDRA_PORT));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_PARTITIONER,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_PARTITIONER,
"org.apache.cassandra.dht.RandomPartitioner"));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_PARTITIONER,configuration.get(AbstractColumnSerDe.CASSANDRA_PARTITIONER));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_CONSISTENCY_LEVEL) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_CONSISTENCY_LEVEL,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_CONSISTENCY_LEVEL,
AbstractColumnSerDe.DEFAULT_CONSISTENCY_LEVEL));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_CONSISTENCY_LEVEL,configuration.get(AbstractColumnSerDe.CASSANDRA_CONSISTENCY_LEVEL));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE,
Integer.toString(AbstractColumnSerDe.DEFAULT_RANGE_BATCH_SIZE)));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE, configuration.get(AbstractColumnSerDe.CASSANDRA_RANGE_BATCH_SIZE));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE,
Integer.toString(AbstractColumnSerDe.DEFAULT_SLICE_PREDICATE_SIZE)));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE, configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_SIZE));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE,
Integer.toString(AbstractColumnSerDe.DEFAULT_SPLIT_SIZE)));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE, configuration.get(AbstractColumnSerDe.CASSANDRA_SPLIT_SIZE));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_BATCH_MUTATION_SIZE) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_BATCH_MUTATION_SIZE,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_BATCH_MUTATION_SIZE,
Integer.toString(AbstractColumnSerDe.DEFAULT_BATCH_MUTATION_SIZE)));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_BATCH_MUTATION_SIZE, configuration.get(AbstractColumnSerDe.CASSANDRA_BATCH_MUTATION_SIZE));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START, ""));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START, configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH, ""));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH, configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR, ""));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR,
configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR));
}
if (configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED) == null)
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED,
tableProperties.getProperty(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED, "false"));
}
else
{
jobProperties.put(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED,
configuration.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED));
}
}
@Override
public Class<? extends InputFormat> getInputFormatClass() {
return HiveCassandraStandardColumnInputFormat.class;
}
@Override
public HiveMetaHook getMetaHook() {
return this;
}
public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException {
return null;
}
public void configureInputJobProperties(TableDesc inTableDesc, Map<String, String> inStringStringMap) {
configureTableJobProperties(inTableDesc, inStringStringMap);
}
public void configureOutputJobProperties(TableDesc inTableDesc, Map<String, String> inStringStringMap) {
configureTableJobProperties(inTableDesc, inStringStringMap);
}
@Override
public Class<? extends OutputFormat> getOutputFormatClass() {
return HiveCassandraOutputFormat.class;
}
@Override
public Class<? extends SerDe> getSerDeClass() {
return CassandraColumnSerDe.class;
}
@Override
public Configuration getConf() {
return this.configuration;
}
@Override
public void setConf(Configuration arg0) {
this.configuration = arg0;
}
@Override
public void preCreateTable(Table table) throws MetaException {
boolean isExternal = MetaStoreUtils.isExternalTable(table);
if (!isExternal) {
throw new MetaException("Cassandra tables must be external.");
}
if (table.getSd().getLocation() != null) {
throw new MetaException("LOCATION may not be specified for Cassandra.");
}
CassandraManager manager = new CassandraManager(table);
try {
//open connection to cassandra
manager.openConnection();
KsDef ks = manager.getKeyspaceDesc();
//create the column family if it doesn't exist.
manager.createCFIfNotFound(ks);
} catch(NotFoundException e) {
manager.createKeyspaceWithColumns();
} finally {
manager.closeConnection();
}
}
@Override
public void commitCreateTable(Table table) throws MetaException {
// No work needed
}
@Override
public void commitDropTable(Table table, boolean deleteData) throws MetaException {
//TODO: Should this be implemented to drop the table and its data from cassandra
boolean isExternal = MetaStoreUtils.isExternalTable(table);
if (deleteData && !isExternal) {
CassandraManager manager = new CassandraManager(table);
try {
//open connection to cassandra
manager.openConnection();
//drop the table
manager.dropTable();
} finally {
manager.closeConnection();
}
}
}
@Override
public void preDropTable(Table table) throws MetaException {
// nothing to do
}
@Override
public void rollbackCreateTable(Table table) throws MetaException {
// No work needed
}
@Override
public void rollbackDropTable(Table table) throws MetaException {
// nothing to do
}
}