package com.alibaba.datax.plugin.rdbms.reader.util;
import com.alibaba.datax.common.constant.CommonConstant;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Constant;
import com.alibaba.datax.plugin.rdbms.reader.Key;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public final class ReaderSplitUtil {
private static final Logger LOG = LoggerFactory
.getLogger(ReaderSplitUtil.class);
public static List<Configuration> doSplit(
Configuration originalSliceConfig, int adviceNumber) {
boolean isTableMode = originalSliceConfig.getBool(Constant.IS_TABLE_MODE).booleanValue();
int eachTableShouldSplittedNumber = -1;
if (isTableMode) {
// adviceNumber这里是channel数量大小, 即datax并发task数量
// eachTableShouldSplittedNumber是单表应该切分的份数, 向上取整可能和adviceNumber没有比例关系了已经
eachTableShouldSplittedNumber = calculateEachTableShouldSplittedNumber(
adviceNumber, originalSliceConfig.getInt(Constant.TABLE_NUMBER_MARK));
}
String column = originalSliceConfig.getString(Key.COLUMN);
String where = originalSliceConfig.getString(Key.WHERE, null);
List<Object> conns = originalSliceConfig.getList(Constant.CONN_MARK, Object.class);
List<Configuration> splittedConfigs = new ArrayList<Configuration>();
for (int i = 0, len = conns.size(); i < len; i++) {
Configuration sliceConfig = originalSliceConfig.clone();
Configuration connConf = Configuration.from(conns.get(i).toString());
String jdbcUrl = connConf.getString(Key.JDBC_URL);
sliceConfig.set(Key.JDBC_URL, jdbcUrl);
// 抽取 jdbcUrl 中的 ip/port 进行资源使用的打标,以提供给 core 做有意义的 shuffle 操作
sliceConfig.set(CommonConstant.LOAD_BALANCE_RESOURCE_MARK, DataBaseType.parseIpFromJdbcUrl(jdbcUrl));
sliceConfig.remove(Constant.CONN_MARK);
Configuration tempSlice;
// 说明是配置的 table 方式
if (isTableMode) {
// 已在之前进行了扩展和`处理,可以直接使用
List<String> tables = connConf.getList(Key.TABLE, String.class);
Validate.isTrue(null != tables && !tables.isEmpty(), "您读取数据库表配置错误.");
String splitPk = originalSliceConfig.getString(Key.SPLIT_PK, null);
//最终切分份数不一定等于 eachTableShouldSplittedNumber
boolean needSplitTable = eachTableShouldSplittedNumber > 1
&& StringUtils.isNotBlank(splitPk);
if (needSplitTable) {
if (tables.size() == 1) {
//原来:如果是单表的,主键切分num=num*2+1
// splitPk is null这类的情况的数据量本身就比真实数据量少很多, 和channel大小比率关系时,不建议考虑
//eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 2 + 1;// 不应该加1导致长尾
//考虑其他比率数字?(splitPk is null, 忽略此长尾)
eachTableShouldSplittedNumber = eachTableShouldSplittedNumber * 5;
}
// 尝试对每个表,切分为eachTableShouldSplittedNumber 份
for (String table : tables) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.TABLE, table);
List<Configuration> splittedSlices = SingleTableSplitUtil
.splitSingleTable(tempSlice, eachTableShouldSplittedNumber);
splittedConfigs.addAll(splittedSlices);
}
} else {
for (String table : tables) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.TABLE, table);
String queryColumn = HintUtil.buildQueryColumn(jdbcUrl, table, column);
tempSlice.set(Key.QUERY_SQL, SingleTableSplitUtil.buildQuerySql(queryColumn, table, where));
splittedConfigs.add(tempSlice);
}
}
} else {
// 说明是配置的 querySql 方式
List<String> sqls = connConf.getList(Key.QUERY_SQL, String.class);
// TODO 是否check 配置为多条语句??
for (String querySql : sqls) {
tempSlice = sliceConfig.clone();
tempSlice.set(Key.QUERY_SQL, querySql);
splittedConfigs.add(tempSlice);
}
}
}
return splittedConfigs;
}
public static Configuration doPreCheckSplit(Configuration originalSliceConfig) {
Configuration queryConfig = originalSliceConfig.clone();
boolean isTableMode = originalSliceConfig.getBool(Constant.IS_TABLE_MODE).booleanValue();
String splitPK = originalSliceConfig.getString(Key.SPLIT_PK);
String column = originalSliceConfig.getString(Key.COLUMN);
String where = originalSliceConfig.getString(Key.WHERE, null);
List<Object> conns = queryConfig.getList(Constant.CONN_MARK, Object.class);
for (int i = 0, len = conns.size(); i < len; i++){
Configuration connConf = Configuration.from(conns.get(i).toString());
List<String> querys = new ArrayList<String>();
List<String> splitPkQuerys = new ArrayList<String>();
String connPath = String.format("connection[%d]",i);
// 说明是配置的 table 方式
if (isTableMode) {
// 已在之前进行了扩展和`处理,可以直接使用
List<String> tables = connConf.getList(Key.TABLE, String.class);
Validate.isTrue(null != tables && !tables.isEmpty(), "您读取数据库表配置错误.");
for (String table : tables) {
querys.add(SingleTableSplitUtil.buildQuerySql(column,table,where));
if (splitPK != null && !splitPK.isEmpty()){
splitPkQuerys.add(SingleTableSplitUtil.genPKSql(splitPK.trim(),table,where));
}
}
if (!splitPkQuerys.isEmpty()){
connConf.set(Key.SPLIT_PK_SQL,splitPkQuerys);
}
connConf.set(Key.QUERY_SQL,querys);
queryConfig.set(connPath,connConf);
} else {
// 说明是配置的 querySql 方式
List<String> sqls = connConf.getList(Key.QUERY_SQL,
String.class);
for (String querySql : sqls) {
querys.add(querySql);
}
connConf.set(Key.QUERY_SQL,querys);
queryConfig.set(connPath,connConf);
}
}
return queryConfig;
}
private static int calculateEachTableShouldSplittedNumber(int adviceNumber,
int tableNumber) {
double tempNum = 1.0 * adviceNumber / tableNumber;
return (int) Math.ceil(tempNum);
}
}