package com.alibaba.datax.plugin.rdbms.reader; import com.alibaba.datax.common.element.BoolColumn; import com.alibaba.datax.common.element.BytesColumn; import com.alibaba.datax.common.element.DateColumn; import com.alibaba.datax.common.element.DoubleColumn; import com.alibaba.datax.common.element.LongColumn; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.element.StringColumn; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.plugin.RecordSender; import com.alibaba.datax.common.plugin.TaskPluginCollector; import com.alibaba.datax.common.statistics.PerfRecord; import com.alibaba.datax.common.statistics.PerfTrace; import com.alibaba.datax.common.util.Configuration; import com.alibaba.datax.plugin.rdbms.reader.util.OriginalConfPretreatmentUtil; import com.alibaba.datax.plugin.rdbms.reader.util.PreCheckTask; import com.alibaba.datax.plugin.rdbms.reader.util.ReaderSplitUtil; import com.alibaba.datax.plugin.rdbms.reader.util.SingleTableSplitUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtil; import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode; import com.alibaba.datax.plugin.rdbms.util.DataBaseType; import com.alibaba.datax.plugin.rdbms.util.RdbmsException; import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.Connection; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.Types; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; public class CommonRdbmsReader { public static class Job { private static final Logger LOG = LoggerFactory .getLogger(Job.class); public Job(DataBaseType dataBaseType) { OriginalConfPretreatmentUtil.DATABASE_TYPE = dataBaseType; SingleTableSplitUtil.DATABASE_TYPE = dataBaseType; } public void init(Configuration originalConfig) { OriginalConfPretreatmentUtil.doPretreatment(originalConfig); LOG.debug("After job init(), job config now is:[\n{}\n]", originalConfig.toJSON()); } public void preCheck(Configuration originalConfig,DataBaseType dataBaseType) { /*检查每个表是否有读权限,以及querySql跟splik Key是否正确*/ Configuration queryConf = ReaderSplitUtil.doPreCheckSplit(originalConfig); String splitPK = queryConf.getString(Key.SPLIT_PK); List<Object> connList = queryConf.getList(Constant.CONN_MARK, Object.class); String username = queryConf.getString(Key.USERNAME); String password = queryConf.getString(Key.PASSWORD); ExecutorService exec; if (connList.size() < 10){ exec = Executors.newFixedThreadPool(connList.size()); }else{ exec = Executors.newFixedThreadPool(10); } Collection<PreCheckTask> taskList = new ArrayList<PreCheckTask>(); for (int i = 0, len = connList.size(); i < len; i++){ Configuration connConf = Configuration.from(connList.get(i).toString()); PreCheckTask t = new PreCheckTask(username,password,connConf,dataBaseType,splitPK); taskList.add(t); } List<Future<Boolean>> results = Lists.newArrayList(); try { results = exec.invokeAll(taskList); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } for (Future<Boolean> result : results){ try { result.get(); } catch (ExecutionException e) { DataXException de = (DataXException) e.getCause(); throw de; }catch (InterruptedException e) { Thread.currentThread().interrupt(); } } exec.shutdownNow(); } public List<Configuration> split(Configuration originalConfig, int adviceNumber) { return ReaderSplitUtil.doSplit(originalConfig, adviceNumber); } public void post(Configuration originalConfig) { // do nothing } public void destroy(Configuration originalConfig) { // do nothing } } public static class Task { private static final Logger LOG = LoggerFactory .getLogger(Task.class); private static final boolean IS_DEBUG = LOG.isDebugEnabled(); protected final byte[] EMPTY_CHAR_ARRAY = new byte[0]; private DataBaseType dataBaseType; private int taskGroupId = -1; private int taskId=-1; private String username; private String password; private String jdbcUrl; private String mandatoryEncoding; // 作为日志显示信息时,需要附带的通用信息。比如信息所对应的数据库连接等信息,针对哪个表做的操作 private String basicMsg; public Task(DataBaseType dataBaseType) { this(dataBaseType, -1, -1); } public Task(DataBaseType dataBaseType,int taskGropuId, int taskId) { this.dataBaseType = dataBaseType; this.taskGroupId = taskGropuId; this.taskId = taskId; } public void init(Configuration readerSliceConfig) { /* for database connection */ this.username = readerSliceConfig.getString(Key.USERNAME); this.password = readerSliceConfig.getString(Key.PASSWORD); this.jdbcUrl = readerSliceConfig.getString(Key.JDBC_URL); //ob10的处理 if (this.jdbcUrl.startsWith(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING) && this.dataBaseType == DataBaseType.MySql) { String[] ss = this.jdbcUrl.split(com.alibaba.datax.plugin.rdbms.writer.Constant.OB10_SPLIT_STRING_PATTERN); if (ss.length != 3) { throw DataXException .asDataXException( DBUtilErrorCode.JDBC_OB10_ADDRESS_ERROR, "JDBC OB10格式错误,请联系askdatax"); } LOG.info("this is ob1_0 jdbc url."); this.username = ss[1].trim() +":"+this.username; this.jdbcUrl = ss[2]; LOG.info("this is ob1_0 jdbc url. user=" + this.username + " :url=" + this.jdbcUrl); } this.mandatoryEncoding = readerSliceConfig.getString(Key.MANDATORY_ENCODING, ""); basicMsg = String.format("jdbcUrl:[%s]", this.jdbcUrl); } public void startRead(Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector, int fetchSize) { String querySql = readerSliceConfig.getString(Key.QUERY_SQL); String table = readerSliceConfig.getString(Key.TABLE); PerfTrace.getInstance().addTaskDetails(taskId, table + "," + basicMsg); LOG.info("Begin to read record by Sql: [{}\n] {}.", querySql, basicMsg); PerfRecord queryPerfRecord = new PerfRecord(taskGroupId,taskId, PerfRecord.PHASE.SQL_QUERY); queryPerfRecord.start(); Connection conn = DBUtil.getConnection(this.dataBaseType, jdbcUrl, username, password); // session config .etc related DBUtil.dealWithSessionConfig(conn, readerSliceConfig, this.dataBaseType, basicMsg); int columnNumber = 0; ResultSet rs = null; try { rs = DBUtil.query(conn, querySql, fetchSize); queryPerfRecord.end(); ResultSetMetaData metaData = rs.getMetaData(); columnNumber = metaData.getColumnCount(); //这个统计干净的result_Next时间 PerfRecord allResultPerfRecord = new PerfRecord(taskGroupId, taskId, PerfRecord.PHASE.RESULT_NEXT_ALL); allResultPerfRecord.start(); long rsNextUsedTime = 0; long lastTime = System.nanoTime(); while (rs.next()) { rsNextUsedTime += (System.nanoTime() - lastTime); this.transportOneRecord(recordSender, rs, metaData, columnNumber, mandatoryEncoding, taskPluginCollector); lastTime = System.nanoTime(); } allResultPerfRecord.end(rsNextUsedTime); //目前大盘是依赖这个打印,而之前这个Finish read record是包含了sql查询和result next的全部时间 LOG.info("Finished read record by Sql: [{}\n] {}.", querySql, basicMsg); }catch (Exception e) { throw RdbmsException.asQueryException(this.dataBaseType, e, querySql, table, username); } finally { DBUtil.closeDBResources(null, conn); } } public void post(Configuration originalConfig) { // do nothing } public void destroy(Configuration originalConfig) { // do nothing } protected Record transportOneRecord(RecordSender recordSender, ResultSet rs, ResultSetMetaData metaData, int columnNumber, String mandatoryEncoding, TaskPluginCollector taskPluginCollector) { Record record = buildRecord(recordSender,rs,metaData,columnNumber,mandatoryEncoding,taskPluginCollector); recordSender.sendToWriter(record); return record; } protected Record buildRecord(RecordSender recordSender,ResultSet rs, ResultSetMetaData metaData, int columnNumber, String mandatoryEncoding, TaskPluginCollector taskPluginCollector) { Record record = recordSender.createRecord(); try { for (int i = 1; i <= columnNumber; i++) { switch (metaData.getColumnType(i)) { case Types.CHAR: case Types.NCHAR: case Types.VARCHAR: case Types.LONGVARCHAR: case Types.NVARCHAR: case Types.LONGNVARCHAR: String rawData; if(StringUtils.isBlank(mandatoryEncoding)){ rawData = rs.getString(i); }else{ rawData = new String((rs.getBytes(i) == null ? EMPTY_CHAR_ARRAY : rs.getBytes(i)), mandatoryEncoding); } record.addColumn(new StringColumn(rawData)); break; case Types.CLOB: case Types.NCLOB: record.addColumn(new StringColumn(rs.getString(i))); break; case Types.SMALLINT: case Types.TINYINT: case Types.INTEGER: case Types.BIGINT: record.addColumn(new LongColumn(rs.getString(i))); break; case Types.NUMERIC: case Types.DECIMAL: record.addColumn(new DoubleColumn(rs.getString(i))); break; case Types.FLOAT: case Types.REAL: case Types.DOUBLE: record.addColumn(new DoubleColumn(rs.getString(i))); break; case Types.TIME: record.addColumn(new DateColumn(rs.getTime(i))); break; // for mysql bug, see http://bugs.mysql.com/bug.php?id=35115 case Types.DATE: if (metaData.getColumnTypeName(i).equalsIgnoreCase("year")) { record.addColumn(new LongColumn(rs.getInt(i))); } else { record.addColumn(new DateColumn(rs.getDate(i))); } break; case Types.TIMESTAMP: record.addColumn(new DateColumn(rs.getTimestamp(i))); break; case Types.BINARY: case Types.VARBINARY: case Types.BLOB: case Types.LONGVARBINARY: record.addColumn(new BytesColumn(rs.getBytes(i))); break; // warn: bit(1) -> Types.BIT 可使用BoolColumn // warn: bit(>1) -> Types.VARBINARY 可使用BytesColumn case Types.BOOLEAN: case Types.BIT: record.addColumn(new BoolColumn(rs.getBoolean(i))); break; case Types.NULL: String stringData = null; if(rs.getObject(i) != null) { stringData = rs.getObject(i).toString(); } record.addColumn(new StringColumn(stringData)); break; default: throw DataXException .asDataXException( DBUtilErrorCode.UNSUPPORTED_TYPE, String.format( "您的配置文件中的列配置信息有误. 因为DataX 不支持数据库读取这种字段类型. 字段名:[%s], 字段名称:[%s], 字段Java类型:[%s]. 请尝试使用数据库函数将其转换datax支持的类型 或者不同步该字段 .", metaData.getColumnName(i), metaData.getColumnType(i), metaData.getColumnClassName(i))); } } } catch (Exception e) { if (IS_DEBUG) { LOG.debug("read data " + record.toString() + " occur exception:", e); } //TODO 这里识别为脏数据靠谱吗? taskPluginCollector.collectDirtyRecord(record, e); if (e instanceof DataXException) { throw (DataXException) e; } } return record; } } }