package com.alibaba.datax.plugin.rdbms.reader.util;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.reader.Constant;
import com.alibaba.datax.plugin.rdbms.reader.Key;
import com.alibaba.datax.plugin.rdbms.util.*;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigInteger;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.Types;
import java.util.ArrayList;
import java.util.List;
public class SingleTableSplitUtil {
private static final Logger LOG = LoggerFactory
.getLogger(SingleTableSplitUtil.class);
public static DataBaseType DATABASE_TYPE;
private SingleTableSplitUtil() {
}
public static List<Configuration> splitSingleTable(
Configuration configuration, int adviceNum) {
List<Configuration> pluginParams = new ArrayList<Configuration>();
List<String> rangeList;
String splitPkName = configuration.getString(Key.SPLIT_PK);
String column = configuration.getString(Key.COLUMN);
String table = configuration.getString(Key.TABLE);
String where = configuration.getString(Key.WHERE, null);
boolean hasWhere = StringUtils.isNotBlank(where);
//String splitMode = configuration.getString(Key.SPLIT_MODE, "");
//if (Constant.SPLIT_MODE_RANDOMSAMPLE.equals(splitMode) && DATABASE_TYPE == DataBaseType.Oracle) {
if (DATABASE_TYPE == DataBaseType.Oracle) {
rangeList = genSplitSqlForOracle(splitPkName, table, where,
configuration, adviceNum);
// warn: mysql etc to be added...
} else {
Pair<Object, Object> minMaxPK = getPkRange(configuration);
if (null == minMaxPK) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"根据切分主键切分表失败. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
configuration.set(Key.QUERY_SQL, buildQuerySql(column, table, where));
if (null == minMaxPK.getLeft() || null == minMaxPK.getRight()) {
// 切分后获取到的start/end 有 Null 的情况
pluginParams.add(configuration);
return pluginParams;
}
boolean isStringType = Constant.PK_TYPE_STRING.equals(configuration
.getString(Constant.PK_TYPE));
boolean isLongType = Constant.PK_TYPE_LONG.equals(configuration
.getString(Constant.PK_TYPE));
if (isStringType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(
String.valueOf(minMaxPK.getLeft()),
String.valueOf(minMaxPK.getRight()), adviceNum,
splitPkName, "'", DATABASE_TYPE);
} else if (isLongType) {
rangeList = RdbmsRangeSplitWrap.splitAndWrap(
new BigInteger(minMaxPK.getLeft().toString()),
new BigInteger(minMaxPK.getRight().toString()),
adviceNum, splitPkName);
} else {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
}
String tempQuerySql;
List<String> allQuerySql = new ArrayList<String>();
if (null != rangeList && !rangeList.isEmpty()) {
for (String range : rangeList) {
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where)
+ (hasWhere ? " and " : " where ") + range;
allQuerySql.add(tempQuerySql);
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
}
} else {
//pluginParams.add(configuration); // this is wrong for new & old split
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where)
+ (hasWhere ? " and " : " where ")
+ String.format(" %s IS NOT NULL", splitPkName);
allQuerySql.add(tempQuerySql);
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
}
// deal pk is null
Configuration tempConfig = configuration.clone();
tempQuerySql = buildQuerySql(column, table, where)
+ (hasWhere ? " and " : " where ")
+ String.format(" %s IS NULL", splitPkName);
allQuerySql.add(tempQuerySql);
LOG.info("After split(), allQuerySql=[\n{}\n].",
StringUtils.join(allQuerySql, "\n"));
tempConfig.set(Key.QUERY_SQL, tempQuerySql);
pluginParams.add(tempConfig);
return pluginParams;
}
public static String buildQuerySql(String column, String table,
String where) {
String querySql;
if (StringUtils.isBlank(where)) {
querySql = String.format(Constant.QUERY_SQL_TEMPLATE_WITHOUT_WHERE,
column, table);
} else {
querySql = String.format(Constant.QUERY_SQL_TEMPLATE, column,
table, where);
}
return querySql;
}
@SuppressWarnings("resource")
private static Pair<Object, Object> getPkRange(Configuration configuration) {
String pkRangeSQL = genPKRangeSQL(configuration);
int fetchSize = configuration.getInt(Constant.FETCH_SIZE);
String jdbcURL = configuration.getString(Key.JDBC_URL);
String username = configuration.getString(Key.USERNAME);
String password = configuration.getString(Key.PASSWORD);
String table = configuration.getString(Key.TABLE);
Connection conn = DBUtil.getConnection(DATABASE_TYPE, jdbcURL, username, password);
Pair<Object, Object> minMaxPK = checkSplitPk(conn, pkRangeSQL, fetchSize, table, username, configuration);
DBUtil.closeDBResources(null, null, conn);
return minMaxPK;
}
public static void precheckSplitPk(Connection conn, String pkRangeSQL, int fetchSize,
String table, String username) {
Pair<Object, Object> minMaxPK = checkSplitPk(conn, pkRangeSQL, fetchSize, table, username, null);
if (null == minMaxPK) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"根据切分主键切分表失败. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
}
/**
* 检测splitPk的配置是否正确。
* configuration为null, 是precheck的逻辑,不需要回写PK_TYPE到configuration中
*
*/
private static Pair<Object, Object> checkSplitPk(Connection conn, String pkRangeSQL, int fetchSize, String table,
String username, Configuration configuration) {
LOG.info("split pk [sql={}] is running... ", pkRangeSQL);
ResultSet rs = null;
Pair<Object, Object> minMaxPK = null;
try {
try {
rs = DBUtil.query(conn, pkRangeSQL, fetchSize);
}catch (Exception e) {
throw RdbmsException.asQueryException(DATABASE_TYPE, e, pkRangeSQL,table,username);
}
ResultSetMetaData rsMetaData = rs.getMetaData();
if (isPKTypeValid(rsMetaData)) {
if (isStringType(rsMetaData.getColumnType(1))) {
if(configuration != null) {
configuration
.set(Constant.PK_TYPE, Constant.PK_TYPE_STRING);
}
while (DBUtil.asyncResultSetNext(rs)) {
minMaxPK = new ImmutablePair<Object, Object>(
rs.getString(1), rs.getString(2));
}
} else if (isLongType(rsMetaData.getColumnType(1))) {
if(configuration != null) {
configuration.set(Constant.PK_TYPE, Constant.PK_TYPE_LONG);
}
while (DBUtil.asyncResultSetNext(rs)) {
minMaxPK = new ImmutablePair<Object, Object>(
rs.getString(1), rs.getString(2));
// check: string shouldn't contain '.', for oracle
String minMax = rs.getString(1) + rs.getString(2);
if (StringUtils.contains(minMax, '.')) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
}
} else {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
} else {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
} catch(DataXException e) {
throw e;
} catch (Exception e) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK, "DataX尝试切分表发生错误. 请检查您的配置并作出修改.", e);
} finally {
DBUtil.closeDBResources(rs, null, null);
}
return minMaxPK;
}
private static boolean isPKTypeValid(ResultSetMetaData rsMetaData) {
boolean ret = false;
try {
int minType = rsMetaData.getColumnType(1);
int maxType = rsMetaData.getColumnType(2);
boolean isNumberType = isLongType(minType);
boolean isStringType = isStringType(minType);
if (minType == maxType && (isNumberType || isStringType)) {
ret = true;
}
} catch (Exception e) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"DataX获取切分主键(splitPk)字段类型失败. 该错误通常是系统底层异常导致. 请联系旺旺:askdatax或者DBA处理.");
}
return ret;
}
// warn: Types.NUMERIC is used for oracle! because oracle use NUMBER to
// store INT, SMALLINT, INTEGER etc, and only oracle need to concern
// Types.NUMERIC
private static boolean isLongType(int type) {
boolean isValidLongType = type == Types.BIGINT || type == Types.INTEGER
|| type == Types.SMALLINT || type == Types.TINYINT;
switch (SingleTableSplitUtil.DATABASE_TYPE) {
case Oracle:
isValidLongType |= type == Types.NUMERIC;
break;
default:
break;
}
return isValidLongType;
}
private static boolean isStringType(int type) {
return type == Types.CHAR || type == Types.NCHAR
|| type == Types.VARCHAR || type == Types.LONGVARCHAR
|| type == Types.NVARCHAR;
}
private static String genPKRangeSQL(Configuration configuration) {
String splitPK = configuration.getString(Key.SPLIT_PK).trim();
String table = configuration.getString(Key.TABLE).trim();
String where = configuration.getString(Key.WHERE, null);
return genPKSql(splitPK,table,where);
}
public static String genPKSql(String splitPK, String table, String where){
String minMaxTemplate = "SELECT MIN(%s),MAX(%s) FROM %s";
String pkRangeSQL = String.format(minMaxTemplate, splitPK, splitPK,
table);
if (StringUtils.isNotBlank(where)) {
pkRangeSQL = String.format("%s WHERE (%s AND %s IS NOT NULL)",
pkRangeSQL, where, splitPK);
}
return pkRangeSQL;
}
/**
* support Number and String split
* */
public static List<String> genSplitSqlForOracle(String splitPK,
String table, String where, Configuration configuration,
int adviceNum) {
if (adviceNum < 1) {
throw new IllegalArgumentException(String.format(
"切分份数不能小于1. 此处:adviceNum=[%s].", adviceNum));
} else if (adviceNum == 1) {
return null;
}
String whereSql = String.format("%s IS NOT NULL", splitPK);
if (StringUtils.isNotBlank(where)) {
whereSql = String.format(" WHERE (%s) AND (%s) ", whereSql, where);
} else {
whereSql = String.format(" WHERE (%s) ", whereSql);
}
Double percentage = configuration.getDouble(Key.SAMPLE_PERCENTAGE, 0.1);
String sampleSqlTemplate = "SELECT * FROM ( SELECT %s FROM %s SAMPLE (%s) %s ORDER BY DBMS_RANDOM.VALUE) WHERE ROWNUM <= %s ORDER by %s ASC";
String splitSql = String.format(sampleSqlTemplate, splitPK, table,
percentage, whereSql, adviceNum, splitPK);
int fetchSize = configuration.getInt(Constant.FETCH_SIZE, 32);
String jdbcURL = configuration.getString(Key.JDBC_URL);
String username = configuration.getString(Key.USERNAME);
String password = configuration.getString(Key.PASSWORD);
Connection conn = DBUtil.getConnection(DATABASE_TYPE, jdbcURL,
username, password);
LOG.info("split pk [sql={}] is running... ", splitSql);
ResultSet rs = null;
List<Pair<Object, Integer>> splitedRange = new ArrayList<Pair<Object, Integer>>();
try {
try {
rs = DBUtil.query(conn, splitSql, fetchSize);
} catch (Exception e) {
throw RdbmsException.asQueryException(DATABASE_TYPE, e,
splitSql, table, username);
}
if (configuration != null) {
configuration
.set(Constant.PK_TYPE, Constant.PK_TYPE_MONTECARLO);
}
ResultSetMetaData rsMetaData = rs.getMetaData();
while (DBUtil.asyncResultSetNext(rs)) {
ImmutablePair<Object, Integer> eachPoint = new ImmutablePair<Object, Integer>(
rs.getObject(1), rsMetaData.getColumnType(1));
splitedRange.add(eachPoint);
}
} catch (DataXException e) {
throw e;
} catch (Exception e) {
throw DataXException.asDataXException(
DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"DataX尝试切分表发生错误. 请检查您的配置并作出修改.", e);
} finally {
DBUtil.closeDBResources(rs, null, null);
}
LOG.debug(JSON.toJSONString(splitedRange));
List<String> rangeSql = new ArrayList<String>();
int splitedRangeSize = splitedRange.size();
// warn: splitedRangeSize may be 0 or 1,切分规则为IS NULL以及 IS NOT NULL
// demo: Parameter rangeResult can not be null and its length can not <2. detail:rangeResult=[24999930].
if (splitedRangeSize >= 2) {
// warn: oracle Number is long type here
if (isLongType(splitedRange.get(0).getRight())) {
BigInteger[] integerPoints = new BigInteger[splitedRange.size()];
for (int i = 0; i < splitedRangeSize; i++) {
integerPoints[i] = new BigInteger(splitedRange.get(i)
.getLeft().toString());
}
rangeSql.addAll(RdbmsRangeSplitWrap.wrapRange(integerPoints,
splitPK));
// its ok if splitedRangeSize is 1
rangeSql.add(RdbmsRangeSplitWrap.wrapFirstLastPoint(
integerPoints[0], integerPoints[splitedRangeSize - 1],
splitPK));
} else if (isStringType(splitedRange.get(0).getRight())) {
// warn: treated as string type
String[] stringPoints = new String[splitedRange.size()];
for (int i = 0; i < splitedRangeSize; i++) {
stringPoints[i] = new String(splitedRange.get(i).getLeft()
.toString());
}
rangeSql.addAll(RdbmsRangeSplitWrap.wrapRange(stringPoints,
splitPK, "'", DATABASE_TYPE));
// its ok if splitedRangeSize is 1
rangeSql.add(RdbmsRangeSplitWrap.wrapFirstLastPoint(
stringPoints[0], stringPoints[splitedRangeSize - 1],
splitPK, "'", DATABASE_TYPE));
} else {
throw DataXException
.asDataXException(
DBUtilErrorCode.ILLEGAL_SPLIT_PK,
"您配置的DataX切分主键(splitPk)有误. 因为您配置的切分主键(splitPk) 类型 DataX 不支持. DataX 仅支持切分主键为一个,并且类型为整数或者字符串类型. 请尝试使用其他的切分主键或者联系 DBA 进行处理.");
}
}
return rangeSql;
}
}