TableService.java example

Explorer
Kylin-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
*/

package org.apache.kylin.rest.service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;

import org.apache.commons.lang.StringUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
import org.apache.kylin.engine.mr.common.MapReduceExecutable;
import org.apache.kylin.job.execution.DefaultChainedExecutable;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.job.execution.ExecutableState;
import org.apache.kylin.metadata.MetadataManager;
import org.apache.kylin.metadata.model.ColumnDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.metadata.model.TableExtDesc;
import org.apache.kylin.metadata.streaming.StreamingConfig;
import org.apache.kylin.rest.constant.Constant;
import org.apache.kylin.rest.exception.InternalErrorException;
import org.apache.kylin.rest.response.TableDescResponse;
import org.apache.kylin.source.hive.HiveClientFactory;
import org.apache.kylin.source.hive.HiveSourceTableLoader;
import org.apache.kylin.source.hive.IHiveClient;
import org.apache.kylin.source.hive.cardinality.HiveColumnCardinalityJob;
import org.apache.kylin.source.hive.cardinality.HiveColumnCardinalityUpdateJob;
import org.apache.kylin.source.kafka.config.KafkaConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.security.access.prepost.PreAuthorize;
import org.springframework.stereotype.Component;

@Component("tableService")
public class TableService extends BasicService {

    private static final Logger logger = LoggerFactory.getLogger(TableService.class);

    @Autowired
    private ModelService modelService;

    @Autowired
    private ProjectService projectService;

    @Autowired
    private StreamingService streamingService;

    @Autowired
    private KafkaConfigService kafkaConfigService;

    public List<TableDesc> getTableDescByProject(String project, boolean withExt) throws IOException {
        List<TableDesc> tables = getProjectManager().listDefinedTables(project);
        if (null == tables) {
            return Collections.emptyList();
        }
        if (withExt) {
            tables = cloneTableDesc(tables);
        }
        return tables;
    }

    public TableDesc getTableDescByName(String tableName, boolean withExt) {
        TableDesc table =  getMetadataManager().getTableDesc(tableName);
        if(withExt){
            table = cloneTableDesc(table);
        }
        return table;
    }

    @PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
    public String[] loadHiveTablesToProject(String[] tables, String project) throws IOException {
        Set<String> loaded = HiveSourceTableLoader.loadHiveTables(tables, getConfig());
        String[] result = (String[]) loaded.toArray(new String[loaded.size()]);
        syncTableToProject(result, project);
        return result;
    }

    private void unLoadHiveTable(String tableName) throws IOException {
        tableName = normalizeHiveTableName(tableName);
        MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
        metaMgr.removeSourceTable(tableName);
        metaMgr.removeTableExt(tableName);
    }

    private void syncTableToProject(String[] tables, String project) throws IOException {
        getProjectManager().addTableDescToProject(tables, project);
    }

    private void removeTableFromProject(String tableName, String projectName) throws IOException {
        tableName = normalizeHiveTableName(tableName);
        getProjectManager().removeTableDescFromProject(tableName, projectName);
    }

    /**
     * table may referenced by several projects, and kylin only keep one copy of meta for each table,
     * that's why we have two if statement here.
     * @param tableName
     * @param project
     * @return
     */
    @PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
    public boolean unLoadHiveTable(String tableName, String project) {
        boolean rtn = false;
        int tableType = 0;

        //remove streaming info
        tableName = normalizeHiveTableName(tableName);
        TableDesc desc = getMetadataManager().getTableDesc(tableName);
        if (desc == null)
            return false;
        tableType = desc.getSourceType();

        try {
            if (!modelService.isTableInModel(tableName, project)) {
                removeTableFromProject(tableName, project);
                rtn = true;
            } else {
                List<String> models = modelService.getModelsUsingTable(tableName, project);
                throw new InternalErrorException("Table is already in use by models " + models);
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
        if (!projectService.isTableInAnyProject(tableName) && !modelService.isTableInAnyModel(tableName)) {
            try {
                unLoadHiveTable(tableName);
                rtn = true;
            } catch (IOException e) {
                logger.error(e.getMessage(), e);
                rtn = false;
            }
        }

        if (tableType == 1 && !projectService.isTableInAnyProject(tableName) && !modelService.isTableInAnyModel(tableName)) {
            StreamingConfig config = null;
            KafkaConfig kafkaConfig = null;
            try {
                config = streamingService.getStreamingManager().getStreamingConfig(tableName);
                kafkaConfig = kafkaConfigService.getKafkaConfig(tableName);
                streamingService.dropStreamingConfig(config);
                kafkaConfigService.dropKafkaConfig(kafkaConfig);
                rtn = true;
            } catch (Exception e) {
                rtn = false;
                logger.error(e.getLocalizedMessage(), e);
            }
        }
        return rtn;
    }

    /**
     *
     * @param desc
     * @param project
     * @throws IOException
     */
    @PreAuthorize(Constant.ACCESS_HAS_ROLE_ADMIN)
    public void addStreamingTable(TableDesc desc, String project) throws IOException {
        desc.setUuid(UUID.randomUUID().toString());
        getMetadataManager().saveSourceTable(desc);
        syncTableToProject(new String[] { desc.getIdentity() }, project);
    }

    /**
     *
     * @return
     * @throws Exception
     */
    public List<String> getHiveDbNames() throws Exception {
        IHiveClient hiveClient = HiveClientFactory.getHiveClient();
        List<String> results = hiveClient.getHiveDbNames();
        return results;
    }

    /**
     *
     * @param database
     * @return
     * @throws Exception
     */
    public List<String> getHiveTableNames(String database) throws Exception {
        IHiveClient hiveClient = HiveClientFactory.getHiveClient();
        List<String> results = hiveClient.getHiveTableNames(database);
        return results;
    }

    private TableDescResponse cloneTableDesc(TableDesc table) {
        TableExtDesc tableExtDesc = getMetadataManager().getTableExt(table.getIdentity());

        // Clone TableDesc
        TableDescResponse rtableDesc = new TableDescResponse(table);
        Map<String, Long> cardinality = new HashMap<String, Long>();
        Map<String, String> dataSourceProp = new HashMap<>();
        String scard = tableExtDesc.getCardinality();
        if (!StringUtils.isEmpty(scard)) {
            String[] cards = StringUtils.split(scard, ",");
            ColumnDesc[] cdescs = rtableDesc.getColumns();
            for (int i = 0; i < cdescs.length; i++) {
                ColumnDesc columnDesc = cdescs[i];
                if (cards.length > i) {
                    cardinality.put(columnDesc.getName(), Long.parseLong(cards[i]));
                } else {
                    logger.error("The result cardinality is not identical with hive table metadata, cardinality : " + scard + " column array length: " + cdescs.length);
                    break;
                }
            }
            rtableDesc.setCardinality(cardinality);
        }
        dataSourceProp.putAll(tableExtDesc.getDataSourceProp());
        rtableDesc.setDescExd(dataSourceProp);
        return rtableDesc;
    }


    private List<TableDesc> cloneTableDesc(List<TableDesc> tables) throws IOException {
        List<TableDesc> descs = new ArrayList<TableDesc>();
        Iterator<TableDesc> it = tables.iterator();
        while (it.hasNext()) {
            TableDesc table = it.next();
            TableDescResponse rtableDesc = cloneTableDesc(table);
            descs.add(rtableDesc);
        }

        return descs;
    }

    @PreAuthorize(Constant.ACCESS_HAS_ROLE_MODELER + " or " + Constant.ACCESS_HAS_ROLE_ADMIN)
    public void calculateCardinalityIfNotPresent(String[] tables, String submitter) throws IOException {
        MetadataManager metaMgr = getMetadataManager();
        ExecutableManager exeMgt = ExecutableManager.getInstance(getConfig());
        for (String table : tables) {
            TableExtDesc tableExtDesc = metaMgr.getTableExt(table);
            String jobID = tableExtDesc.getJodID();
            if (null == jobID || ExecutableState.RUNNING != exeMgt.getOutput(jobID).getState()) {
                calculateCardinality(table, submitter);
            }
        }
    }

    /**
     * Generate cardinality for table This will trigger a hadoop job
     * The result will be merged into table exd info
     *
     * @param tableName
     */
    @PreAuthorize(Constant.ACCESS_HAS_ROLE_MODELER + " or " + Constant.ACCESS_HAS_ROLE_ADMIN)
    public void calculateCardinality(String tableName, String submitter) throws IOException {
        tableName = normalizeHiveTableName(tableName);
        TableDesc table = getMetadataManager().getTableDesc(tableName);
        final TableExtDesc tableExt = getMetadataManager().getTableExt(tableName);
        if (table == null) {
            IllegalArgumentException e = new IllegalArgumentException("Cannot find table descriptor " + tableName);
            logger.error("Cannot find table descriptor " + tableName, e);
            throw e;
        }

        DefaultChainedExecutable job = new DefaultChainedExecutable();
        //make sure the job could be scheduled when the DistributedScheduler is enable.
        job.setParam("segmentId", tableName);
        job.setName("Hive Column Cardinality calculation for table '" + tableName + "'");
        job.setSubmitter(submitter);

        String outPath = getConfig().getHdfsWorkingDirectory() + "cardinality/" + job.getId() + "/" + tableName;
        String param = "-table " + tableName + " -output " + outPath;

        MapReduceExecutable step1 = new MapReduceExecutable();

        step1.setMapReduceJobClass(HiveColumnCardinalityJob.class);
        step1.setMapReduceParams(param);
        step1.setParam("segmentId", tableName);

        job.addTask(step1);

        HadoopShellExecutable step2 = new HadoopShellExecutable();

        step2.setJobClass(HiveColumnCardinalityUpdateJob.class);
        step2.setJobParams(param);
        step2.setParam("segmentId", tableName);
        job.addTask(step2);
        tableExt.setJodID(job.getId());
        getMetadataManager().saveTableExt(tableExt);

        getExecutableManager().addJob(job);
    }

    public String normalizeHiveTableName(String tableName){
        String[] dbTableName = HadoopUtil.parseHiveTableName(tableName);
        return (dbTableName[0] + "." + dbTableName[1]).toUpperCase();
    }
}