/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.source.hive;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.lang.String.format;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TblColRef;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
public class SchemaChecker {
private final IHiveClient hiveClient;
private final MetadataManager metadataManager;
private final CubeManager cubeManager;
static class CheckResult {
private final boolean valid;
private final String reason;
private CheckResult(boolean valid, String reason) {
this.valid = valid;
this.reason = reason;
}
void raiseExceptionWhenInvalid() {
if (!valid) {
throw new RuntimeException(reason);
}
}
static CheckResult validOnFirstLoad(String tableName) {
return new CheckResult(true, format("Table '%s' hasn't been loaded before", tableName));
}
static CheckResult validOnCompatibleSchema(String tableName) {
return new CheckResult(true, format("Table '%s' is compatible with all existing cubes", tableName));
}
static CheckResult invalidOnFetchSchema(String tableName, Exception e) {
return new CheckResult(false, format("Failed to fetch metadata of '%s': %s", tableName, e.getMessage()));
}
static CheckResult invalidOnIncompatibleSchema(String tableName, List<String> reasons) {
StringBuilder buf = new StringBuilder();
for (String reason : reasons) {
buf.append("- ").append(reason).append("\n");
}
return new CheckResult(false, format("Found %d issue(s) with '%s':%n%s Please disable and purge related cube(s) first", reasons.size(), tableName, buf.toString()));
}
}
SchemaChecker(IHiveClient hiveClient, MetadataManager metadataManager, CubeManager cubeManager) {
this.hiveClient = checkNotNull(hiveClient, "hiveClient is null");
this.metadataManager = checkNotNull(metadataManager, "metadataManager is null");
this.cubeManager = checkNotNull(cubeManager, "cubeManager is null");
}
private List<HiveTableMeta.HiveTableColumnMeta> fetchSchema(String dbName, String tblName) throws Exception {
List<HiveTableMeta.HiveTableColumnMeta> columnMetas = Lists.newArrayList();
columnMetas.addAll(hiveClient.getHiveTableMeta(dbName, tblName).allColumns);
return columnMetas;
}
private List<CubeInstance> findCubeByTable(final String fullTableName) {
Iterable<CubeInstance> relatedCubes = Iterables.filter(cubeManager.listAllCubes(), new Predicate<CubeInstance>() {
@Override
public boolean apply(@Nullable CubeInstance cube) {
if (cube == null || cube.allowBrokenDescriptor()) {
return false;
}
DataModelDesc model = cube.getModel();
if (model == null)
return false;
return model.containsTable(fullTableName);
}
});
return ImmutableList.copyOf(relatedCubes);
}
private boolean isColumnCompatible(ColumnDesc column, HiveTableMeta.HiveTableColumnMeta field) {
if (!column.getName().equalsIgnoreCase(field.name)) {
return false;
}
String typeStr = field.dataType;
// kylin uses double internally for float, see HiveSourceTableLoader.java
// TODO should this normalization to be in DataType class ?
if ("float".equalsIgnoreCase(typeStr)) {
typeStr = "double";
}
DataType fieldType = DataType.getType(typeStr);
if (column.getType().isIntegerFamily()) {
// OLAPTable.listSourceColumns converts some integer columns to bigint,
// therefore strict type comparison won't work.
// changing from one integer type to another should be fine.
return fieldType.isIntegerFamily();
} else {
// only compare base type name, changing precision or scale should be fine
return column.getTypeName().equals(fieldType.getName());
}
}
/**
* check whether all columns used in `cube` has compatible schema in current hive schema denoted by `fieldsMap`.
* @param cube cube to check, must use `table` in its model
* @param table kylin's table metadata
* @param fieldsMap current hive schema of `table`
* @return true if all columns used in `cube` has compatible schema with `fieldsMap`, false otherwise
*/
private List<String> checkAllColumnsInCube(CubeInstance cube, TableDesc table, Map<String, HiveTableMeta.HiveTableColumnMeta> fieldsMap) {
Set<ColumnDesc> usedColumns = Sets.newHashSet();
for (TblColRef col : cube.getAllColumns()) {
usedColumns.add(col.getColumnDesc());
}
List<String> violateColumns = Lists.newArrayList();
for (ColumnDesc column : table.getColumns()) {
if (!column.isComputedColumnn() && usedColumns.contains(column)) {
HiveTableMeta.HiveTableColumnMeta field = fieldsMap.get(column.getName());
if (field == null || !isColumnCompatible(column, field)) {
violateColumns.add(column.getName());
}
}
}
return violateColumns;
}
/**
* check whether all columns in `table` are still in `fields` and have the same index as before.
*
* @param table kylin's table metadata
* @param fields current table metadata in hive
* @return true if only new columns are appended in hive, false otherwise
*/
private boolean checkAllColumnsInTableDesc(TableDesc table, List<HiveTableMeta.HiveTableColumnMeta> fields) {
if (table.getColumnCount() > fields.size()) {
return false;
}
ColumnDesc[] columns = table.getColumns();
for (int i = 0; i < columns.length; i++) {
if (!isColumnCompatible(columns[i], fields.get(i))) {
return false;
}
}
return true;
}
public CheckResult allowReload(String dbName, String tblName) {
final String fullTableName = (dbName + "." + tblName).toUpperCase();
TableDesc existing = metadataManager.getTableDesc(fullTableName);
if (existing == null) {
return CheckResult.validOnFirstLoad(fullTableName);
}
List<HiveTableMeta.HiveTableColumnMeta> currentFields;
Map<String, HiveTableMeta.HiveTableColumnMeta> currentFieldsMap = Maps.newHashMap();
try {
currentFields = fetchSchema(dbName, tblName);
} catch (Exception e) {
return CheckResult.invalidOnFetchSchema(fullTableName, e);
}
for (HiveTableMeta.HiveTableColumnMeta field : currentFields) {
currentFieldsMap.put(field.name.toUpperCase(), field);
}
List<String> issues = Lists.newArrayList();
for (CubeInstance cube : findCubeByTable(fullTableName)) {
String modelName = cube.getModel().getName();
// if user reloads a fact table used by cube, then all used columns must match current schema
if (cube.getModel().isFactTable(fullTableName)) {
TableDesc factTable = cube.getModel().findFirstTable(fullTableName).getTableDesc();
List<String> violateColumns = checkAllColumnsInCube(cube, factTable, currentFieldsMap);
if (!violateColumns.isEmpty()) {
issues.add(format("Column %s used in cube[%s] and model[%s], but changed in hive", violateColumns, cube.getName(), modelName));
}
}
// if user reloads a lookup table used by cube, only append column(s) are allowed, all existing columns
// must be the same (except compatible type changes)
if (cube.getModel().isLookupTable(fullTableName)) {
TableDesc lookupTable = cube.getModel().findFirstTable(fullTableName).getTableDesc();
if (!checkAllColumnsInTableDesc(lookupTable, currentFields)) {
issues.add(format("Table '%s' is used as Lookup Table in cube[%s] and model[%s], but changed in hive", lookupTable.getIdentity(), cube.getName(), modelName));
}
}
}
if (issues.isEmpty()) {
return CheckResult.validOnCompatibleSchema(fullTableName);
}
return CheckResult.invalidOnIncompatibleSchema(fullTableName, issues);
}
}