package com.alibaba.datax.plugin.writer.hbase094xwriter; import com.alibaba.datax.common.element.DoubleColumn; import com.alibaba.datax.common.element.LongColumn; import com.alibaba.datax.common.element.Record; import com.alibaba.datax.common.exception.DataXException; import com.alibaba.datax.common.util.Configuration; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.Validate; import org.apache.commons.lang3.time.DateUtils; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; public class NormalTask extends HbaseAbstractTask { private static final Logger LOG = LoggerFactory.getLogger(NormalTask.class); public NormalTask(Configuration configuration) { super(configuration); } @Override public Put convertRecordToPut(Record record){ byte[] rowkey = getRowkey(record); Put put = null; if(this.versionColumn == null){ put = new Put(rowkey); put.setWriteToWAL(super.walFlag); }else { long timestamp = getVersion(record); put = new Put(rowkey,timestamp); } for (Configuration aColumn : columns) { Integer index = aColumn.getInt(Key.INDEX); String type = aColumn.getString(Key.TYPE); ColumnType columnType = ColumnType.getByTypeName(type); String name = aColumn.getString(Key.NAME); String promptInfo = "Hbasewriter 中,column 的列配置格式应该是:列族:列名. 您配置的列错误:" + name; String[] cfAndQualifier = name.split(":"); Validate.isTrue(cfAndQualifier != null && cfAndQualifier.length == 2 && StringUtils.isNotBlank(cfAndQualifier[0]) && StringUtils.isNotBlank(cfAndQualifier[1]), promptInfo); if(index >= record.getColumnNumber()){ throw DataXException.asDataXException(Hbase094xWriterErrorCode.ILLEGAL_VALUE, String.format("您的column配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.",record.getColumnNumber(),index)); } byte[] columnBytes = getColumnByte(columnType,record.getColumn(index)); //columnBytes 为null忽略这列 if(null != columnBytes){ put.add(Bytes.toBytes( cfAndQualifier[0]), Bytes.toBytes(cfAndQualifier[1]), columnBytes); }else{ continue; } } return put; } public byte[] getRowkey(Record record){ byte[] rowkeyBuffer = {}; for (Configuration aRowkeyColumn : rowkeyColumn) { Integer index = aRowkeyColumn.getInt(Key.INDEX); String type = aRowkeyColumn.getString(Key.TYPE); ColumnType columnType = ColumnType.getByTypeName(type); if(index == -1){ String value = aRowkeyColumn.getString(Key.VALUE); rowkeyBuffer = Bytes.add(rowkeyBuffer,getValueByte(columnType,value)); }else{ if(index >= record.getColumnNumber()){ throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_ROWKEY_ERROR, String.format("您的rowkeyColumn配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.",record.getColumnNumber(),index)); } byte[] value = getColumnByte(columnType,record.getColumn(index)); rowkeyBuffer = Bytes.add(rowkeyBuffer, value); } } return rowkeyBuffer; } public long getVersion(Record record){ int index = versionColumn.getInt(Key.INDEX); long timestamp; if(index == -1){ //指定时间作为版本 timestamp = versionColumn.getLong(Key.VALUE); if(timestamp < 0){ throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, "您指定的版本非法!"); } }else{ //指定列作为版本,long/doubleColumn直接record.aslong, 其它类型尝试用yyyy-MM-dd HH:mm:ss,yyyy-MM-dd HH:mm:ss SSS去format if(index >= record.getColumnNumber()){ throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, String.format("您的versionColumn配置项中中index值超出范围,根据reader端配置,index的值小于%s,而您配置的值为%s,请检查并修改.",record.getColumnNumber(),index)); } if(record.getColumn(index).getRawData() == null){ throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, "您指定的版本为空!"); } SimpleDateFormat df_senconds = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); SimpleDateFormat df_ms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS"); if(record.getColumn(index) instanceof LongColumn || record.getColumn(index) instanceof DoubleColumn){ timestamp = record.getColumn(index).asLong(); }else { Date date; try{ date = df_ms.parse(record.getColumn(index).asString()); }catch (ParseException e){ try { date = df_senconds.parse(record.getColumn(index).asString()); } catch (ParseException e1) { LOG.info(String.format("您指定第[%s]列作为hbase写入版本,但在尝试用yyyy-MM-dd HH:mm:ss 和 yyyy-MM-dd HH:mm:ss SSS 去解析为Date时均出错,请检查并修改",index)); throw DataXException.asDataXException(Hbase094xWriterErrorCode.CONSTRUCT_VERSION_ERROR, e1); } } timestamp = date.getTime(); } } return timestamp; } }