package com.alibaba.datax.plugin.reader.hbasereader.util;
import com.alibaba.datax.common.element.*;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.reader.hbasereader.ColumnType;
import com.alibaba.datax.plugin.reader.hbasereader.HbaseReaderErrorCode;
import com.alibaba.datax.plugin.reader.hbasereader.Key;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
public abstract class MultiVersionTask extends HbaseAbstractTask {
private static byte[] COLON_BYTE;
private int maxVersion;
private List<KeyValue> kvList = new ArrayList<KeyValue>();
private int currentReadPosition = 0;
// 四元组的类型
private ColumnType rowkeyReadoutType = null;
private ColumnType columnReadoutType = null;
private ColumnType timestampReadoutType = null;
private ColumnType valueReadoutType = null;
public MultiVersionTask(Configuration configuration) {
super(configuration);
this.maxVersion = configuration.getInt(Key.MAX_VERSION);
List<String> userConfiguredTetradTypes = configuration.getList(Key.TETRAD_TYPE, String.class);
this.rowkeyReadoutType = ColumnType.getByTypeName(userConfiguredTetradTypes.get(0));
this.columnReadoutType = ColumnType.getByTypeName(userConfiguredTetradTypes.get(1));
this.timestampReadoutType = ColumnType.getByTypeName(userConfiguredTetradTypes.get(2));
this.valueReadoutType = ColumnType.getByTypeName(userConfiguredTetradTypes.get(3));
try {
MultiVersionTask.COLON_BYTE = ":".getBytes("utf8");
} catch (UnsupportedEncodingException e) {
throw DataXException.asDataXException(HbaseReaderErrorCode.PREPAR_READ_ERROR, "系统内部获取 列族与列名冒号分隔符的二进制时失败.", e);
}
}
private void convertKVToLine(KeyValue keyValue, Record record) throws Exception {
byte[] rawRowkey = keyValue.getRow();
long timestamp = keyValue.getTimestamp();
byte[] cfAndQualifierName = Bytes.add(keyValue.getFamily(), MultiVersionTask.COLON_BYTE, keyValue.getQualifier());
record.addColumn(convertBytesToAssignType(this.rowkeyReadoutType, rawRowkey));
record.addColumn(convertBytesToAssignType(this.columnReadoutType, cfAndQualifierName));
// 直接忽略了用户配置的 timestamp 的类型
record.addColumn(new LongColumn(timestamp));
record.addColumn(convertBytesToAssignType(this.valueReadoutType, keyValue.getValue()));
}
private Column convertBytesToAssignType(ColumnType columnType, byte[] byteArray) throws UnsupportedEncodingException {
Column column;
switch (columnType) {
case BOOLEAN:
column = new BoolColumn(byteArray == null ? null : Bytes.toBoolean(byteArray));
break;
case SHORT:
column = new LongColumn(byteArray == null ? null : String.valueOf(Bytes.toShort(byteArray)));
break;
case INT:
column = new LongColumn(byteArray == null ? null : Bytes.toInt(byteArray));
break;
case LONG:
column = new LongColumn(byteArray == null ? null : Bytes.toLong(byteArray));
break;
case BYTES:
column = new BytesColumn(byteArray);
break;
case FLOAT:
column = new DoubleColumn(byteArray == null ? null : Bytes.toFloat(byteArray));
break;
case DOUBLE:
column = new DoubleColumn(byteArray == null ? null : Bytes.toDouble(byteArray));
break;
case STRING:
column = new StringColumn(byteArray == null ? null : new String(byteArray, super.encoding));
break;
case BINARY_STRING:
column = new StringColumn(byteArray == null ? null : Bytes.toStringBinary(byteArray));
break;
default:
throw DataXException.asDataXException(HbaseReaderErrorCode.ILLEGAL_VALUE, "Hbasereader 不支持您配置的列类型:" + columnType);
}
return column;
}
@Override
public boolean fetchLine(Record record) throws Exception {
Result result;
if (this.kvList.size() == this.currentReadPosition) {
result = super.getNextHbaseRow();
if (result == null) {
return false;
}
this.kvList = result.list();
if (this.kvList == null) {
return false;
}
this.currentReadPosition = 0;
}
try {
KeyValue keyValue = this.kvList.get(this.currentReadPosition);
convertKVToLine(keyValue, record);
} catch (Exception e) {
throw e;
} finally {
this.currentReadPosition++;
}
return true;
}
public void setMaxVersions(Scan scan) {
if (this.maxVersion == -1 || this.maxVersion == Integer.MAX_VALUE) {
scan.setMaxVersions();
} else {
scan.setMaxVersions(this.maxVersion);
}
}
}