/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.rest.service;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.apache.commons.lang.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
import org.apache.kylin.engine.mr.common.MapReduceExecutable;
import org.apache.kylin.job.execution.DefaultChainedExecutable;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.job.execution.ExecutableState;
import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TableExtDesc;
import org.apache.kylin.metadata.streaming.StreamingConfig;
import org.apache.kylin.rest.constant.Constant;
import org.apache.kylin.rest.exception.InternalErrorException;
import org.apache.kylin.rest.response.TableDescResponse;
import org.apache.kylin.source.hive.HiveClientFactory;
import org.apache.kylin.source.hive.HiveSourceTableLoader;
import org.apache.kylin.source.hive.IHiveClient;
import org.apache.kylin.source.hive.cardinality.HiveColumnCardinalityJob;
import org.apache.kylin.source.hive.cardinality.HiveColumnCardinalityUpdateJob;
import org.apache.kylin.source.kafka.config.KafkaConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;
@Component("tableService")
public class TableService extends BasicService {
private static final Logger logger = LoggerFactory.getLogger(TableService.class);
@Autowired
private ModelService modelService;
@Autowired
private ProjectService projectService;
@Autowired
private StreamingService streamingService;
@Autowired
private KafkaConfigService kafkaConfigService;
public List<TableDesc> getTableDescByProject(String project, boolean withExt) throws IOException {
List<TableDesc> tables = getProjectManager().listDefinedTables(project);
if (null == tables) {
return Collections.emptyList();
}
if (withExt) {
tables = cloneTableDesc(tables);
}
return tables;
}
public TableDesc getTableDescByName(String tableName, boolean withExt) {
TableDesc table = getMetadataManager().getTableDesc(tableName);
if(withExt){
table = cloneTableDesc(table);
}
return table;
}
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
public String[] loadHiveTablesToProject(String[] tables, String project) throws IOException {
Set<String> loaded = HiveSourceTableLoader.loadHiveTables(tables, getConfig());
String[] result = (String[]) loaded.toArray(new String[loaded.size()]);
syncTableToProject(result, project);
return result;
}
private void unLoadHiveTable(String tableName) throws IOException {
tableName = normalizeHiveTableName(tableName);
MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
metaMgr.removeSourceTable(tableName);
metaMgr.removeTableExt(tableName);
}
private void syncTableToProject(String[] tables, String project) throws IOException {
getProjectManager().addTableDescToProject(tables, project);
}
private void removeTableFromProject(String tableName, String projectName) throws IOException {
tableName = normalizeHiveTableName(tableName);
getProjectManager().removeTableDescFromProject(tableName, projectName);
}
/**
* table may referenced by several projects, and kylin only keep one copy of meta for each table,
* that's why we have two if statement here.
* @param tableName
* @param project
* @return
*/
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
public boolean unLoadHiveTable(String tableName, String project) {
boolean rtn = false;
int tableType = 0;
//remove streaming info
tableName = normalizeHiveTableName(tableName);
TableDesc desc = getMetadataManager().getTableDesc(tableName);
if (desc == null)
return false;
tableType = desc.getSourceType();
try {
if (!modelService.isTableInModel(tableName, project)) {
removeTableFromProject(tableName, project);
rtn = true;
} else {
List<String> models = modelService.getModelsUsingTable(tableName, project);
throw new InternalErrorException("Table is already in use by models " + models);
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
if (!projectService.isTableInAnyProject(tableName) && !modelService.isTableInAnyModel(tableName)) {
try {
unLoadHiveTable(tableName);
rtn = true;
} catch (IOException e) {
logger.error(e.getMessage(), e);
rtn = false;
}
}
if (tableType == 1 && !projectService.isTableInAnyProject(tableName) && !modelService.isTableInAnyModel(tableName)) {
StreamingConfig config = null;
KafkaConfig kafkaConfig = null;
try {
config = streamingService.getStreamingManager().getStreamingConfig(tableName);
kafkaConfig = kafkaConfigService.getKafkaConfig(tableName);
streamingService.dropStreamingConfig(config);
kafkaConfigService.dropKafkaConfig(kafkaConfig);
rtn = true;
} catch (Exception e) {
rtn = false;
logger.error(e.getLocalizedMessage(), e);
}
}
return rtn;
}
/**
*
* @param desc
* @param project
* @throws IOException
*/
@PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
public void addStreamingTable(TableDesc desc, String project) throws IOException {
desc.setUuid(UUID.randomUUID().toString());
getMetadataManager().saveSourceTable(desc);
syncTableToProject(new String[] { desc.getIdentity() }, project);
}
/**
*
* @return
* @throws Exception
*/
public List<String> getHiveDbNames() throws Exception {
IHiveClient hiveClient = HiveClientFactory.getHiveClient();
List<String> results = hiveClient.getHiveDbNames();
return results;
}
/**
*
* @param database
* @return
* @throws Exception
*/
public List<String> getHiveTableNames(String database) throws Exception {
IHiveClient hiveClient = HiveClientFactory.getHiveClient();
List<String> results = hiveClient.getHiveTableNames(database);
return results;
}
private TableDescResponse cloneTableDesc(TableDesc table) {
TableExtDesc tableExtDesc = getMetadataManager().getTableExt(table.getIdentity());
// Clone TableDesc
TableDescResponse rtableDesc = new TableDescResponse(table);
Map<String, Long> cardinality = new HashMap<String, Long>();
Map<String, String> dataSourceProp = new HashMap<>();
String scard = tableExtDesc.getCardinality();
if (!StringUtils.isEmpty(scard)) {
String[] cards = StringUtils.split(scard, ",");
ColumnDesc[] cdescs = rtableDesc.getColumns();
for (int i = 0; i < cdescs.length; i++) {
ColumnDesc columnDesc = cdescs[i];
if (cards.length > i) {
cardinality.put(columnDesc.getName(), Long.parseLong(cards[i]));
} else {
logger.error("The result cardinality is not identical with hive table metadata, cardinality : " + scard + " column array length: " + cdescs.length);
break;
}
}
rtableDesc.setCardinality(cardinality);
}
dataSourceProp.putAll(tableExtDesc.getDataSourceProp());
rtableDesc.setDescExd(dataSourceProp);
return rtableDesc;
}
private List<TableDesc> cloneTableDesc(List<TableDesc> tables) throws IOException {
List<TableDesc> descs = new ArrayList<TableDesc>();
Iterator<TableDesc> it = tables.iterator();
while (it.hasNext()) {
TableDesc table = it.next();
TableDescResponse rtableDesc = cloneTableDesc(table);
descs.add(rtableDesc);
}
return descs;
}
@PreAuthorize(Constant.ACCESS_HAS_ROLE_MODELER + " or " + Constant.ACCESS_HAS_ROLE_ADMIN)
public void calculateCardinalityIfNotPresent(String[] tables, String submitter) throws IOException {
MetadataManager metaMgr = getMetadataManager();
ExecutableManager exeMgt = ExecutableManager.getInstance(getConfig());
for (String table : tables) {
TableExtDesc tableExtDesc = metaMgr.getTableExt(table);
String jobID = tableExtDesc.getJodID();
if (null == jobID || ExecutableState.RUNNING != exeMgt.getOutput(jobID).getState()) {
calculateCardinality(table, submitter);
}
}
}
/**
* Generate cardinality for table This will trigger a hadoop job
* The result will be merged into table exd info
*
* @param tableName
*/
@PreAuthorize(Constant.ACCESS_HAS_ROLE_MODELER + " or " + Constant.ACCESS_HAS_ROLE_ADMIN)
public void calculateCardinality(String tableName, String submitter) throws IOException {
tableName = normalizeHiveTableName(tableName);
TableDesc table = getMetadataManager().getTableDesc(tableName);
final TableExtDesc tableExt = getMetadataManager().getTableExt(tableName);
if (table == null) {
IllegalArgumentException e = new IllegalArgumentException("Cannot find table descriptor " + tableName);
logger.error("Cannot find table descriptor " + tableName, e);
throw e;
}
DefaultChainedExecutable job = new DefaultChainedExecutable();
//make sure the job could be scheduled when the DistributedScheduler is enable.
job.setParam("segmentId", tableName);
job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
job.setSubmitter(submitter);
String outPath = getConfig().getHdfsWorkingDirectory() + "cardinality/" + job.getId() + "/" + tableName;
String param = "-table " + tableName + " -output " + outPath;
MapReduceExecutable step1 = new MapReduceExecutable();
step1.setMapReduceJobClass(HiveColumnCardinalityJob.class);
step1.setMapReduceParams(param);
step1.setParam("segmentId", tableName);
job.addTask(step1);
HadoopShellExecutable step2 = new HadoopShellExecutable();
step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
step2.setJobParams(param);
step2.setParam("segmentId", tableName);
job.addTask(step2);
tableExt.setJodID(job.getId());
getMetadataManager().saveTableExt(tableExt);
getExecutableManager().addJob(job);
}
public String normalizeHiveTableName(String tableName){
String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
return (dbTableName[0] + "." + dbTableName[1]).toUpperCase();
}
}