/** * Copyright 2013 Cloudera Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.cdk.data.hbase; import com.cloudera.cdk.data.DatasetDescriptor; import com.cloudera.cdk.data.DatasetException; import com.cloudera.cdk.data.MetadataProviderException; import com.cloudera.cdk.data.PartitionStrategy; import com.cloudera.cdk.data.hbase.avro.AvroEntitySchema; import com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser; import com.cloudera.cdk.data.hbase.impl.Constants; import com.cloudera.cdk.data.hbase.impl.EntitySchema; import com.cloudera.cdk.data.hbase.impl.SchemaManager; import com.cloudera.cdk.data.spi.AbstractMetadataProvider; import java.io.IOException; import java.util.Collection; import java.util.Set; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HBaseMetadataProvider extends AbstractMetadataProvider { private static final Logger logger = LoggerFactory .getLogger(HBaseMetadataProvider.class); private HBaseAdmin hbaseAdmin; private SchemaManager schemaManager; public HBaseMetadataProvider(HBaseAdmin hbaseAdmin, SchemaManager schemaManager) { this.hbaseAdmin = hbaseAdmin; this.schemaManager = schemaManager; } @Override public DatasetDescriptor create(String name, DatasetDescriptor descriptor) { try { String managedSchemaName = "managed_schemas"; // TODO: allow table to be specified if (!hbaseAdmin.tableExists(managedSchemaName)) { HTableDescriptor table = new HTableDescriptor(managedSchemaName); table.addFamily(new HColumnDescriptor("meta")); table.addFamily(new HColumnDescriptor("schema")); table.addFamily(new HColumnDescriptor("_s")); hbaseAdmin.createTable(table); } } catch (IOException e) { throw new DatasetException(e); } String entitySchemaString = descriptor.getSchema().toString(true); AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser(); AvroEntitySchema entitySchema = parser.parseEntitySchema(entitySchemaString); String tableName = getTableName(name); String entityName = getEntityName(name); schemaManager.refreshManagedSchemaCache(tableName, entityName); schemaManager.createSchema(tableName, entityName, entitySchemaString, "com.cloudera.cdk.data.hbase.avro.AvroKeyEntitySchemaParser", "com.cloudera.cdk.data.hbase.avro.AvroKeySerDe", "com.cloudera.cdk.data.hbase.avro.AvroEntitySerDe"); try { if (!hbaseAdmin.tableExists(tableName)) { HTableDescriptor desc = new HTableDescriptor(tableName); desc.addFamily(new HColumnDescriptor(Constants.SYS_COL_FAMILY)); desc.addFamily(new HColumnDescriptor(Constants.OBSERVABLE_COL_FAMILY)); for (String columnFamily : entitySchema.getRequiredColumnFamilies()) { desc.addFamily(new HColumnDescriptor(columnFamily)); } hbaseAdmin.createTable(desc); } else { Set<String> familiesToAdd = entitySchema.getRequiredColumnFamilies(); familiesToAdd.add(new String(Constants.SYS_COL_FAMILY)); familiesToAdd.add(new String(Constants.OBSERVABLE_COL_FAMILY)); HTableDescriptor desc = hbaseAdmin.getTableDescriptor(tableName .getBytes()); for (HColumnDescriptor columnDesc : desc.getColumnFamilies()) { String familyName = columnDesc.getNameAsString(); if (familiesToAdd.contains(familyName)) { familiesToAdd.remove(familyName); } } if (familiesToAdd.size() > 0) { hbaseAdmin.disableTable(tableName); try { for (String family : familiesToAdd) { hbaseAdmin.addColumn(tableName, new HColumnDescriptor(family)); } } finally { hbaseAdmin.enableTable(tableName); } } } } catch (IOException e) { throw new DatasetException(e); } return withPartitionStrategy(descriptor); } @Override public DatasetDescriptor update(String name, DatasetDescriptor descriptor) { String tableName = getTableName(name); String entityName = getEntityName(name); schemaManager.refreshManagedSchemaCache(tableName, entityName); String schemaString = descriptor.getSchema().toString(); AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser(); EntitySchema entitySchema = parser.parseEntitySchema(schemaString); if (schemaManager.getEntityVersion(tableName, entityName, entitySchema) == -1) { schemaManager.migrateSchema(tableName, entityName, schemaString); } else { logger.info("Schema hasn't changed, not migrating: (" + name + ")"); } return withPartitionStrategy(descriptor); } @SuppressWarnings("deprecation") @Override public DatasetDescriptor load(String name) { if (!exists(name)) { throw new com.cloudera.cdk.data.NoSuchDatasetException("No such dataset: " + name); } String tableName = getTableName(name); String entityName = getEntityName(name); return getDatasetDescriptor(schemaManager.getEntitySchema(tableName, entityName).getRawSchema()); } @Override public boolean delete(String name) { DatasetDescriptor descriptor = load(name); String tableName = getTableName(name); String entityName = getEntityName(name); schemaManager.deleteSchema(tableName, entityName); String entitySchemaString = descriptor.getSchema().toString(true); AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser(); AvroEntitySchema entitySchema = parser.parseEntitySchema(entitySchemaString); // TODO: this may delete columns for other entities if they share column families // TODO: https://issues.cloudera.org/browse/CDK-145, https://issues.cloudera.org/browse/CDK-146 for (String columnFamily : entitySchema.getRequiredColumnFamilies()) { try { hbaseAdmin.disableTable(tableName); try { hbaseAdmin.deleteColumn(tableName, columnFamily); } finally { hbaseAdmin.enableTable(tableName); } } catch (IOException e) { throw new MetadataProviderException(e); } } return true; } @Override public boolean exists(String name) { String tableName = getTableName(name); String entityName = getEntityName(name); schemaManager.refreshManagedSchemaCache(tableName, entityName); return schemaManager.hasManagedSchema(tableName, entityName); } public Collection<String> list() { throw new UnsupportedOperationException(); } static String getTableName(String name) { // TODO: change to use namespace (CDK-140) if (name.contains(".")) { return name.substring(0, name.indexOf('.')); } return name; } static String getEntityName(String name) { return name.substring(name.indexOf('.') + 1); } private static DatasetDescriptor getDatasetDescriptor(String schemaString) { AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser(); PartitionStrategy partitionStrategy = parser.parseKeySchema(schemaString) .getPartitionStrategy(); return new DatasetDescriptor.Builder() .schemaLiteral(schemaString) .partitionStrategy(partitionStrategy) .build(); } // TODO: move the logic of parsing keys to DatasetDescriptor itself private static DatasetDescriptor withPartitionStrategy(DatasetDescriptor descriptor) { AvroKeyEntitySchemaParser parser = new AvroKeyEntitySchemaParser(); PartitionStrategy partitionStrategy = parser.parseKeySchema(descriptor.getSchema().toString()) .getPartitionStrategy(); return new DatasetDescriptor.Builder() .schema(descriptor.getSchema()) .partitionStrategy(partitionStrategy) .location(descriptor.getLocation()) .build(); } }