/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.engine.mr;
import java.io.IOException;
import java.util.List;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
import org.apache.kylin.engine.mr.common.MapReduceExecutable;
import org.apache.kylin.engine.mr.steps.CreateDictionaryJob;
import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
import org.apache.kylin.engine.mr.steps.FactDistinctColumnsJob;
import org.apache.kylin.engine.mr.steps.MergeDictionaryStep;
import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterBuildStep;
import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterMergeStep;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.engine.JobEngineConfig;
import com.google.common.base.Preconditions;
/**
* Hold reusable steps for builders.
*/
public class JobBuilderSupport {
final protected JobEngineConfig config;
final protected CubeSegment seg;
final protected String submitter;
final public static String LayeredCuboidFolderPrefix = "level_";
public JobBuilderSupport(CubeSegment seg, String submitter) {
Preconditions.checkNotNull(seg, "segment cannot be null");
this.config = new JobEngineConfig(seg.getConfig());
this.seg = seg;
this.submitter = submitter;
}
public MapReduceExecutable createFactDistinctColumnsStep(String jobId) {
return createFactDistinctColumnsStep(jobId, false);
}
public MapReduceExecutable createFactDistinctColumnsStepWithStats(String jobId) {
return createFactDistinctColumnsStep(jobId, true);
}
private MapReduceExecutable createFactDistinctColumnsStep(String jobId, boolean withStats) {
MapReduceExecutable result = new MapReduceExecutable();
result.setName(ExecutableConstants.STEP_NAME_FACT_DISTINCT_COLUMNS);
result.setMapReduceJobClass(FactDistinctColumnsJob.class);
StringBuilder cmd = new StringBuilder();
appendMapReduceParameters(cmd);
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
appendExecCmdParameters(cmd, BatchConstants.ARG_OUTPUT, getFactDistinctColumnsPath(jobId));
appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
appendExecCmdParameters(cmd, BatchConstants.ARG_STATS_ENABLED, String.valueOf(withStats));
appendExecCmdParameters(cmd, BatchConstants.ARG_STATS_OUTPUT, getStatisticsPath(jobId));
appendExecCmdParameters(cmd, BatchConstants.ARG_STATS_SAMPLING_PERCENT, String.valueOf(config.getConfig().getCubingInMemSamplingPercent()));
appendExecCmdParameters(cmd, BatchConstants.ARG_JOB_NAME, "Kylin_Fact_Distinct_Columns_" + seg.getRealization().getName() + "_Step");
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBING_JOB_ID, jobId);
result.setMapReduceParams(cmd.toString());
result.setCounterSaveAs(CubingJob.SOURCE_RECORD_COUNT + "," + CubingJob.SOURCE_SIZE_BYTES);
return result;
}
public HadoopShellExecutable createBuildDictionaryStep(String jobId) {
// base cuboid job
HadoopShellExecutable buildDictionaryStep = new HadoopShellExecutable();
buildDictionaryStep.setName(ExecutableConstants.STEP_NAME_BUILD_DICTIONARY);
StringBuilder cmd = new StringBuilder();
appendExecCmdParameters(cmd, BatchConstants.ARG_CUBE_NAME, seg.getRealization().getName());
appendExecCmdParameters(cmd, BatchConstants.ARG_SEGMENT_ID, seg.getUuid());
appendExecCmdParameters(cmd, BatchConstants.ARG_INPUT, getFactDistinctColumnsPath(jobId));
buildDictionaryStep.setJobParams(cmd.toString());
buildDictionaryStep.setJobClass(CreateDictionaryJob.class);
return buildDictionaryStep;
}
public UpdateCubeInfoAfterBuildStep createUpdateCubeInfoAfterBuildStep(String jobId) {
final UpdateCubeInfoAfterBuildStep result = new UpdateCubeInfoAfterBuildStep();
result.setName(ExecutableConstants.STEP_NAME_UPDATE_CUBE_INFO);
result.getParams().put(BatchConstants.CFG_OUTPUT_PATH, getFactDistinctColumnsPath(jobId));
CubingExecutableUtil.setCubeName(seg.getRealization().getName(), result.getParams());
CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
CubingExecutableUtil.setCubingJobId(jobId, result.getParams());
return result;
}
public MergeDictionaryStep createMergeDictionaryStep(List<String> mergingSegmentIds) {
MergeDictionaryStep result = new MergeDictionaryStep();
result.setName(ExecutableConstants.STEP_NAME_MERGE_DICTIONARY);
CubingExecutableUtil.setCubeName(seg.getRealization().getName(), result.getParams());
CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
CubingExecutableUtil.setMergingSegmentIds(mergingSegmentIds, result.getParams());
return result;
}
public UpdateCubeInfoAfterMergeStep createUpdateCubeInfoAfterMergeStep(List<String> mergingSegmentIds, String jobId) {
UpdateCubeInfoAfterMergeStep result = new UpdateCubeInfoAfterMergeStep();
result.setName(ExecutableConstants.STEP_NAME_UPDATE_CUBE_INFO);
CubingExecutableUtil.setCubeName(seg.getRealization().getName(), result.getParams());
CubingExecutableUtil.setSegmentId(seg.getUuid(), result.getParams());
CubingExecutableUtil.setCubingJobId(jobId, result.getParams());
CubingExecutableUtil.setMergingSegmentIds(mergingSegmentIds, result.getParams());
return result;
}
// ============================================================================
public String getJobWorkingDir(String jobId) {
return getJobWorkingDir(config, jobId);
}
public String getRealizationRootPath(String jobId) {
return getJobWorkingDir(jobId) + "/" + seg.getRealization().getName();
}
public String getCuboidRootPath(String jobId) {
return getRealizationRootPath(jobId) + "/cuboid/";
}
public String getCuboidRootPath(CubeSegment seg) {
return getCuboidRootPath(seg.getLastBuildJobID());
}
public String getSecondaryIndexPath(String jobId) {
return getRealizationRootPath(jobId) + "/secondary_index/";
}
public void appendMapReduceParameters(StringBuilder buf) {
appendMapReduceParameters(buf, JobEngineConfig.DEFAUL_JOB_CONF_SUFFIX);
}
public void appendMapReduceParameters(StringBuilder buf, String jobType) {
try {
String jobConf = config.getHadoopJobConfFilePath(jobType);
if (jobConf != null && jobConf.length() > 0) {
buf.append(" -conf ").append(jobConf);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public String getFactDistinctColumnsPath(String jobId) {
return getRealizationRootPath(jobId) + "/fact_distinct_columns";
}
public String getStatisticsPath(String jobId) {
return getRealizationRootPath(jobId) + "/fact_distinct_columns/" + BatchConstants.CFG_OUTPUT_STATISTICS;
}
// ============================================================================
// static methods also shared by other job flow participant
// ----------------------------------------------------------------------------
public static String getJobWorkingDir(JobEngineConfig conf, String jobId) {
return getJobWorkingDir(conf.getHdfsWorkingDirectory(), jobId);
}
public static String getJobWorkingDir(String hdfsDir, String jobId) {
if (!hdfsDir.endsWith("/")) {
hdfsDir = hdfsDir + "/";
}
return hdfsDir + "kylin-" + jobId;
}
public static StringBuilder appendExecCmdParameters(StringBuilder buf, String paraName, String paraValue) {
return buf.append(" -").append(paraName).append(" ").append(paraValue);
}
public static String getCuboidOutputPathsByLevel(String cuboidRootPath, int level) {
if (level == 0) {
return cuboidRootPath + LayeredCuboidFolderPrefix + "base_cuboid";
} else {
return cuboidRootPath + LayeredCuboidFolderPrefix + level + "_cuboid";
}
}
}